diff options
author | bnagaev <bnagaev@yandex-team.ru> | 2022-02-10 16:47:04 +0300 |
---|---|---|
committer | Daniil Cherednik <dcherednik@yandex-team.ru> | 2022-02-10 16:47:04 +0300 |
commit | c74559fb88da8adac0d9186cfa55a6b13c47695f (patch) | |
tree | b83306b6e37edeea782e9eed673d89286c4fef35 /contrib/libs/hyperscan/src/parser | |
parent | d6449ba66291ff0c0d352c82e6eb3efb4c8a7e8d (diff) | |
download | ydb-c74559fb88da8adac0d9186cfa55a6b13c47695f.tar.gz |
Restoring authorship annotation for <bnagaev@yandex-team.ru>. Commit 2 of 2.
Diffstat (limited to 'contrib/libs/hyperscan/src/parser')
59 files changed, 21205 insertions, 21205 deletions
diff --git a/contrib/libs/hyperscan/src/parser/AsciiComponentClass.cpp b/contrib/libs/hyperscan/src/parser/AsciiComponentClass.cpp index 0fa72ec5f0..7cfa6e11b3 100644 --- a/contrib/libs/hyperscan/src/parser/AsciiComponentClass.cpp +++ b/contrib/libs/hyperscan/src/parser/AsciiComponentClass.cpp @@ -1,160 +1,160 @@ -/* - * Copyright (c) 2015, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -/** \file - * \brief Character classes and their mnemonics. - */ -#include "AsciiComponentClass.h" -#include "Utf8ComponentClass.h" -#include "buildstate.h" -#include "parse_error.h" -#include "position.h" -#include "position_info.h" -#include "nfagraph/ng_builder.h" -#include "util/charreach_util.h" - -using namespace std; - -namespace ue2 { - -AsciiComponentClass::AsciiComponentClass(const ParseMode &mode_in) - : ComponentClass(mode_in), position(GlushkovBuildState::POS_UNINITIALIZED) { - assert(!mode.utf8); -} - -AsciiComponentClass *AsciiComponentClass::clone() const { - return new AsciiComponentClass(*this); -} - -bool AsciiComponentClass::class_empty(void) const { - assert(finalized); - return cr.none(); -} - -void AsciiComponentClass::createRange(unichar to) { - assert(range_start <= 0xff); - unsigned char from = (u8)range_start; - if (from > to) { - throw LocatedParseError("Range out of order in character class"); - } - - in_cand_range = false; - CharReach ncr(from, to); - if (mode.caseless) { - make_caseless(&ncr); - } - cr |= ncr; - range_start = INVALID_UNICODE; -} - -void AsciiComponentClass::notePositions(GlushkovBuildState &bs) { - // We should always be finalized by now. - assert(finalized); - - NFABuilder &builder = bs.getBuilder(); - position = builder.makePositions(1); - - builder.addCharReach(position, cr); - builder.setNodeReportID(position, 0 /* offset adj */); - recordPosBounds(position, position + 1); -} - -void AsciiComponentClass::buildFollowSet(GlushkovBuildState &, - const vector<PositionInfo> &) { - // all follow set construction is handled by firsts/lasts -} - -void AsciiComponentClass::add(PredefinedClass c, bool negative) { - if (in_cand_range) { // can't form a range here - throw LocatedParseError("Invalid range in character class"); - } - DEBUG_PRINTF("getting %u %s\n", (u32)c, negative ? "^" : ""); - - if (mode.ucp) { - c = translateForUcpMode(c, mode); - } - - // Note: caselessness is handled by getPredefinedCharReach. - CharReach pcr = getPredefinedCharReach(c, mode); - if (negative) { - pcr.flip(); - } - - cr |= pcr; - range_start = INVALID_UNICODE; - in_cand_range = false; -} - -void AsciiComponentClass::add(unichar c) { - DEBUG_PRINTF("adding \\x%02x\n", c); - if (c > 0xff) { // too big! - throw LocatedParseError("Hexadecimal value is greater than \\xFF"); - } - - if (in_cand_range) { - createRange(c); - return; - } - - CharReach ncr(c, c); - if (mode.caseless) { - make_caseless(&ncr); - } - - cr |= ncr; - range_start = c; -} - -void AsciiComponentClass::finalize() { - if (finalized) { - return; - } - - // Handle unclosed ranges, like '[a-]' and '[a-\Q\E]' -- in these cases the - // dash is a literal dash. - if (in_cand_range) { - cr.set('-'); - in_cand_range = false; - } - - if (m_negate) { - cr.flip(); - } - - finalized = true; -} - -vector<PositionInfo> AsciiComponentClass::first(void) const { - return vector<PositionInfo>(1, PositionInfo(position)); -} - -vector<PositionInfo> AsciiComponentClass::last(void) const { - return vector<PositionInfo>(1, PositionInfo(position)); -} - -} // namespace ue2 +/* + * Copyright (c) 2015, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/** \file + * \brief Character classes and their mnemonics. + */ +#include "AsciiComponentClass.h" +#include "Utf8ComponentClass.h" +#include "buildstate.h" +#include "parse_error.h" +#include "position.h" +#include "position_info.h" +#include "nfagraph/ng_builder.h" +#include "util/charreach_util.h" + +using namespace std; + +namespace ue2 { + +AsciiComponentClass::AsciiComponentClass(const ParseMode &mode_in) + : ComponentClass(mode_in), position(GlushkovBuildState::POS_UNINITIALIZED) { + assert(!mode.utf8); +} + +AsciiComponentClass *AsciiComponentClass::clone() const { + return new AsciiComponentClass(*this); +} + +bool AsciiComponentClass::class_empty(void) const { + assert(finalized); + return cr.none(); +} + +void AsciiComponentClass::createRange(unichar to) { + assert(range_start <= 0xff); + unsigned char from = (u8)range_start; + if (from > to) { + throw LocatedParseError("Range out of order in character class"); + } + + in_cand_range = false; + CharReach ncr(from, to); + if (mode.caseless) { + make_caseless(&ncr); + } + cr |= ncr; + range_start = INVALID_UNICODE; +} + +void AsciiComponentClass::notePositions(GlushkovBuildState &bs) { + // We should always be finalized by now. + assert(finalized); + + NFABuilder &builder = bs.getBuilder(); + position = builder.makePositions(1); + + builder.addCharReach(position, cr); + builder.setNodeReportID(position, 0 /* offset adj */); + recordPosBounds(position, position + 1); +} + +void AsciiComponentClass::buildFollowSet(GlushkovBuildState &, + const vector<PositionInfo> &) { + // all follow set construction is handled by firsts/lasts +} + +void AsciiComponentClass::add(PredefinedClass c, bool negative) { + if (in_cand_range) { // can't form a range here + throw LocatedParseError("Invalid range in character class"); + } + DEBUG_PRINTF("getting %u %s\n", (u32)c, negative ? "^" : ""); + + if (mode.ucp) { + c = translateForUcpMode(c, mode); + } + + // Note: caselessness is handled by getPredefinedCharReach. + CharReach pcr = getPredefinedCharReach(c, mode); + if (negative) { + pcr.flip(); + } + + cr |= pcr; + range_start = INVALID_UNICODE; + in_cand_range = false; +} + +void AsciiComponentClass::add(unichar c) { + DEBUG_PRINTF("adding \\x%02x\n", c); + if (c > 0xff) { // too big! + throw LocatedParseError("Hexadecimal value is greater than \\xFF"); + } + + if (in_cand_range) { + createRange(c); + return; + } + + CharReach ncr(c, c); + if (mode.caseless) { + make_caseless(&ncr); + } + + cr |= ncr; + range_start = c; +} + +void AsciiComponentClass::finalize() { + if (finalized) { + return; + } + + // Handle unclosed ranges, like '[a-]' and '[a-\Q\E]' -- in these cases the + // dash is a literal dash. + if (in_cand_range) { + cr.set('-'); + in_cand_range = false; + } + + if (m_negate) { + cr.flip(); + } + + finalized = true; +} + +vector<PositionInfo> AsciiComponentClass::first(void) const { + return vector<PositionInfo>(1, PositionInfo(position)); +} + +vector<PositionInfo> AsciiComponentClass::last(void) const { + return vector<PositionInfo>(1, PositionInfo(position)); +} + +} // namespace ue2 diff --git a/contrib/libs/hyperscan/src/parser/AsciiComponentClass.h b/contrib/libs/hyperscan/src/parser/AsciiComponentClass.h index 3e6c2dc908..925fa9bff4 100644 --- a/contrib/libs/hyperscan/src/parser/AsciiComponentClass.h +++ b/contrib/libs/hyperscan/src/parser/AsciiComponentClass.h @@ -1,89 +1,89 @@ -/* - * Copyright (c) 2015, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -/** \file - * \brief Character classes and their mnemonics. - */ - -#ifndef ASCIICOMPONENTCLASS_H -#define ASCIICOMPONENTCLASS_H - -#include "ComponentClass.h" -#include "util/charreach.h" - -namespace ue2 { - -class AsciiComponentClass : public ComponentClass { - friend class ConstructLiteralVisitor; - friend class DumpVisitor; - friend class PrintVisitor; - friend class CaselessVisitor; - friend class SimplifyVisitor; - friend class SimplifyCandidatesVisitor; -public: - explicit AsciiComponentClass(const ParseMode &mode_in); - ~AsciiComponentClass() override {} - AsciiComponentClass *clone() const override; - - Component *accept(ComponentVisitor &v) override { - Component *c = v.visit(this); - v.post(this); - return c; - } - - void accept(ConstComponentVisitor &v) const override { - v.pre(*this); - v.during(*this); - v.post(*this); - } - - bool class_empty(void) const override; - void add(PredefinedClass c, bool negative) override; - void add(unichar c) override; - void finalize(void) override; - void notePositions(GlushkovBuildState &bs) override; - void buildFollowSet(GlushkovBuildState &bs, - const std::vector<PositionInfo> &) override; - std::vector<PositionInfo> first(void) const override; - std::vector<PositionInfo> last(void) const override; - -protected: - void createRange(unichar to) override; - -private: - Position position; - CharReach cr; - - // Private copy ctor. Use clone instead. - AsciiComponentClass(const AsciiComponentClass &other) - : ComponentClass(other), position(other.position), cr(other.cr) {} -}; - -} // namespace ue2 - -#endif // ASCIICOMPONENTCLASS_H +/* + * Copyright (c) 2015, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/** \file + * \brief Character classes and their mnemonics. + */ + +#ifndef ASCIICOMPONENTCLASS_H +#define ASCIICOMPONENTCLASS_H + +#include "ComponentClass.h" +#include "util/charreach.h" + +namespace ue2 { + +class AsciiComponentClass : public ComponentClass { + friend class ConstructLiteralVisitor; + friend class DumpVisitor; + friend class PrintVisitor; + friend class CaselessVisitor; + friend class SimplifyVisitor; + friend class SimplifyCandidatesVisitor; +public: + explicit AsciiComponentClass(const ParseMode &mode_in); + ~AsciiComponentClass() override {} + AsciiComponentClass *clone() const override; + + Component *accept(ComponentVisitor &v) override { + Component *c = v.visit(this); + v.post(this); + return c; + } + + void accept(ConstComponentVisitor &v) const override { + v.pre(*this); + v.during(*this); + v.post(*this); + } + + bool class_empty(void) const override; + void add(PredefinedClass c, bool negative) override; + void add(unichar c) override; + void finalize(void) override; + void notePositions(GlushkovBuildState &bs) override; + void buildFollowSet(GlushkovBuildState &bs, + const std::vector<PositionInfo> &) override; + std::vector<PositionInfo> first(void) const override; + std::vector<PositionInfo> last(void) const override; + +protected: + void createRange(unichar to) override; + +private: + Position position; + CharReach cr; + + // Private copy ctor. Use clone instead. + AsciiComponentClass(const AsciiComponentClass &other) + : ComponentClass(other), position(other.position), cr(other.cr) {} +}; + +} // namespace ue2 + +#endif // ASCIICOMPONENTCLASS_H diff --git a/contrib/libs/hyperscan/src/parser/Component.cpp b/contrib/libs/hyperscan/src/parser/Component.cpp index 39cc22cb53..b40ce84d38 100644 --- a/contrib/libs/hyperscan/src/parser/Component.cpp +++ b/contrib/libs/hyperscan/src/parser/Component.cpp @@ -1,75 +1,75 @@ -/* - * Copyright (c) 2015, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -/** \file - * \brief Base class for all components. - */ - - -#include "Component.h" - -#include "buildstate.h" -#include "position.h" -#include "position_info.h" -#include "ue2common.h" - -using namespace std; - -namespace ue2 { - -Component::Component() - : pos_begin(GlushkovBuildState::POS_UNINITIALIZED), - pos_end(GlushkovBuildState::POS_UNINITIALIZED) {} - -Component::~Component() {} - -bool Component::repeatable() const { - return true; -} - -void Component::recordPosBounds(u32 b, u32 e) { - pos_begin = b; - pos_end = e; -} - -void Component::optimise(bool) { -} - -bool Component::vacuous_everywhere(void) const { - return false; -} - -bool Component::checkEmbeddedStartAnchor(bool) const { - return false; -} - -bool Component::checkEmbeddedEndAnchor(bool) const { - return false; -} - -} // namespace ue2 +/* + * Copyright (c) 2015, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/** \file + * \brief Base class for all components. + */ + + +#include "Component.h" + +#include "buildstate.h" +#include "position.h" +#include "position_info.h" +#include "ue2common.h" + +using namespace std; + +namespace ue2 { + +Component::Component() + : pos_begin(GlushkovBuildState::POS_UNINITIALIZED), + pos_end(GlushkovBuildState::POS_UNINITIALIZED) {} + +Component::~Component() {} + +bool Component::repeatable() const { + return true; +} + +void Component::recordPosBounds(u32 b, u32 e) { + pos_begin = b; + pos_end = e; +} + +void Component::optimise(bool) { +} + +bool Component::vacuous_everywhere(void) const { + return false; +} + +bool Component::checkEmbeddedStartAnchor(bool) const { + return false; +} + +bool Component::checkEmbeddedEndAnchor(bool) const { + return false; +} + +} // namespace ue2 diff --git a/contrib/libs/hyperscan/src/parser/Component.h b/contrib/libs/hyperscan/src/parser/Component.h index f7160ae630..1ebce677ca 100644 --- a/contrib/libs/hyperscan/src/parser/Component.h +++ b/contrib/libs/hyperscan/src/parser/Component.h @@ -1,145 +1,145 @@ -/* - * Copyright (c) 2015, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -/** \file - * \brief Base class for all components. - */ - -#ifndef _RE_COMPONENT_H_ -#define _RE_COMPONENT_H_ - -#include "ComponentVisitor.h" -#include "ConstComponentVisitor.h" - -#include "position.h" -#include "ue2common.h" - -#include <set> -#include <string> -#include <vector> - -namespace ue2 { - -class GlushkovBuildState; -class PositionInfo; - -enum EmptyPathType { - NOT_EMPTY, /**< component must consume characters */ - EPS_ONLY_PATHS, /**< eps path with no overhanging asserts */ - BOUNDARY_PATHS /**< eps paths some with overhanging asserts */ -}; - -/** \brief Base class for regular expression parse tree components. */ -class Component { - friend class DumpVisitor; -public: - /** \brief Constructor. */ - Component(); - - /** \brief Destructor. */ - virtual ~Component(); - - /** \brief Returns a newly-allocated deep copy of this component. */ - virtual Component *clone() const = 0; - - /** \brief Apply the given visitor functor. */ - virtual Component *accept(ComponentVisitor &v) = 0; - - /** \brief Apply the given const visitor functor. */ - virtual void accept(ConstComponentVisitor &v) const = 0; - - /** \brief Glushkov construction First() function. - * \return set of initial positions in this component. */ - virtual std::vector<PositionInfo> first() const = 0; - - /** \brief Glushkov construction Last() function. - * \return set of final positions in this component. */ - virtual std::vector<PositionInfo> last() const = 0; - - /** \brief Glushkov construction Empty() function. - * \return true iff the component accepts epsilon. - * - * Note: ^, $, etc are considered empty. */ - virtual bool empty() const = 0; - - /** \brief True iff epsilon can pass through the component. - * - * Note: ^, $, etc are not vacuous everywhere. */ - virtual bool vacuous_everywhere(void) const; - - /** \brief True iff the component is repeatable on its own, without being - * encapsulated in a sequence first. - * - * This is true for most components, but not for repeats, anchors and word - * boundaries. */ - virtual bool repeatable() const; - - /** \brief Optimisation pass on the component tree. - * - * Called before \ref notePositions. May modify to the component tree. - * Assumes no start of match information is required. - */ - virtual void optimise(bool connected_to_sds); - - /** \brief Informs the Glushkov build process of the positions used by this - * component. */ - virtual void notePositions(GlushkovBuildState &bs) = 0; - - /** \brief Glushkov construction Follow() function. - * - * Constructs (in \a bs) the set of positions in this component reachable - * from the positions in \a lastPos. - * - * \throw ParseError on failure - */ - virtual void buildFollowSet(GlushkovBuildState &bs, - const std::vector<PositionInfo> &lastPos) = 0; - - /** \brief Return value is used for chaining, throws if finds embedded - * anchor. */ - virtual bool checkEmbeddedStartAnchor(bool at_start) const; - - /* \brief Return value is used for chaining, throws if finds embedded - * anchor. */ - virtual bool checkEmbeddedEndAnchor(bool at_end) const; - -protected: - /** \brief Called during \ref notePositions. */ - void recordPosBounds(u32 b, u32 e); - - u32 pos_begin; - u32 pos_end; - - // Protected copy ctor. Use clone instead. - Component(const Component &other) - : pos_begin(other.pos_begin), pos_end(other.pos_end) {} -}; - -} // namespace ue2 - -#endif +/* + * Copyright (c) 2015, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/** \file + * \brief Base class for all components. + */ + +#ifndef _RE_COMPONENT_H_ +#define _RE_COMPONENT_H_ + +#include "ComponentVisitor.h" +#include "ConstComponentVisitor.h" + +#include "position.h" +#include "ue2common.h" + +#include <set> +#include <string> +#include <vector> + +namespace ue2 { + +class GlushkovBuildState; +class PositionInfo; + +enum EmptyPathType { + NOT_EMPTY, /**< component must consume characters */ + EPS_ONLY_PATHS, /**< eps path with no overhanging asserts */ + BOUNDARY_PATHS /**< eps paths some with overhanging asserts */ +}; + +/** \brief Base class for regular expression parse tree components. */ +class Component { + friend class DumpVisitor; +public: + /** \brief Constructor. */ + Component(); + + /** \brief Destructor. */ + virtual ~Component(); + + /** \brief Returns a newly-allocated deep copy of this component. */ + virtual Component *clone() const = 0; + + /** \brief Apply the given visitor functor. */ + virtual Component *accept(ComponentVisitor &v) = 0; + + /** \brief Apply the given const visitor functor. */ + virtual void accept(ConstComponentVisitor &v) const = 0; + + /** \brief Glushkov construction First() function. + * \return set of initial positions in this component. */ + virtual std::vector<PositionInfo> first() const = 0; + + /** \brief Glushkov construction Last() function. + * \return set of final positions in this component. */ + virtual std::vector<PositionInfo> last() const = 0; + + /** \brief Glushkov construction Empty() function. + * \return true iff the component accepts epsilon. + * + * Note: ^, $, etc are considered empty. */ + virtual bool empty() const = 0; + + /** \brief True iff epsilon can pass through the component. + * + * Note: ^, $, etc are not vacuous everywhere. */ + virtual bool vacuous_everywhere(void) const; + + /** \brief True iff the component is repeatable on its own, without being + * encapsulated in a sequence first. + * + * This is true for most components, but not for repeats, anchors and word + * boundaries. */ + virtual bool repeatable() const; + + /** \brief Optimisation pass on the component tree. + * + * Called before \ref notePositions. May modify to the component tree. + * Assumes no start of match information is required. + */ + virtual void optimise(bool connected_to_sds); + + /** \brief Informs the Glushkov build process of the positions used by this + * component. */ + virtual void notePositions(GlushkovBuildState &bs) = 0; + + /** \brief Glushkov construction Follow() function. + * + * Constructs (in \a bs) the set of positions in this component reachable + * from the positions in \a lastPos. + * + * \throw ParseError on failure + */ + virtual void buildFollowSet(GlushkovBuildState &bs, + const std::vector<PositionInfo> &lastPos) = 0; + + /** \brief Return value is used for chaining, throws if finds embedded + * anchor. */ + virtual bool checkEmbeddedStartAnchor(bool at_start) const; + + /* \brief Return value is used for chaining, throws if finds embedded + * anchor. */ + virtual bool checkEmbeddedEndAnchor(bool at_end) const; + +protected: + /** \brief Called during \ref notePositions. */ + void recordPosBounds(u32 b, u32 e); + + u32 pos_begin; + u32 pos_end; + + // Protected copy ctor. Use clone instead. + Component(const Component &other) + : pos_begin(other.pos_begin), pos_end(other.pos_end) {} +}; + +} // namespace ue2 + +#endif diff --git a/contrib/libs/hyperscan/src/parser/ComponentAlternation.cpp b/contrib/libs/hyperscan/src/parser/ComponentAlternation.cpp index c9bd541d55..3e6515fa44 100644 --- a/contrib/libs/hyperscan/src/parser/ComponentAlternation.cpp +++ b/contrib/libs/hyperscan/src/parser/ComponentAlternation.cpp @@ -1,190 +1,190 @@ -/* - * Copyright (c) 2015, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -/** \file - * \brief Alternations (foo|bar|baz). - */ - - -#include "ComponentAlternation.h" - -#include "buildstate.h" -#include "position.h" -#include "position_info.h" -#include "nfagraph/ng_builder.h" -#include "ue2common.h" - -#include <algorithm> - -using namespace std; - -namespace ue2 { - -ComponentAlternation::ComponentAlternation() { - // empty -} - -ComponentAlternation::~ComponentAlternation() { - // empty -} - -ComponentAlternation::ComponentAlternation(const ComponentAlternation &other) - : Component(other) { - for (const auto &c : other.children) { - assert(c); - children.push_back(unique_ptr<Component>(c->clone())); - } -} - -ComponentAlternation * ComponentAlternation::clone() const { - return new ComponentAlternation(*this); -} - -Component *ComponentAlternation::accept(ComponentVisitor &v) { - Component *c = v.visit(this); - if (c != this) { - v.post(this); - return c; - } - - for (auto i = children.begin(), e = children.end(); i != e; ++i) { - Component *child = i->get(); - c = (*i)->accept(v); - if (c != child) { - // Child has been replaced (new Component pointer) or we've been - // instructed to delete it (null). - i->reset(c); - } - } - - // Remove deleted children. - children.erase(remove(children.begin(), children.end(), nullptr), - children.end()); - - v.post(this); - return this; -} - -void ComponentAlternation::accept(ConstComponentVisitor &v) const { - v.pre(*this); - for (auto i = children.begin(), e = children.end(); i != e; ++i) { - (*i)->accept(v); - if (i + 1 != e) { - v.during(*this); - } - } - - v.post(*this); -} - -void ComponentAlternation::append(unique_ptr<Component> component) { - children.push_back(move(component)); -} - -vector<PositionInfo> ComponentAlternation::first() const { - // firsts come from all our subcomponents in position order. This will - // maintain left-to-right priority order. - vector<PositionInfo> firsts, subfirsts; - - for (const auto &c : children) { - subfirsts = c->first(); - firsts.insert(firsts.end(), subfirsts.begin(), subfirsts.end()); - } - return firsts; -} - -vector<PositionInfo> ComponentAlternation::last() const { - vector<PositionInfo> lasts, sublasts; - - for (const auto &c : children) { - sublasts = c->last(); - lasts.insert(lasts.end(), sublasts.begin(), sublasts.end()); - } - return lasts; -} - -bool ComponentAlternation::empty(void) const { - // an alternation can be empty if any of its components are empty - for (const auto &c : children) { - if (c->empty()) { - return true; - } - } - - return false; -} - -void ComponentAlternation::notePositions(GlushkovBuildState &bs) { - u32 pb = bs.getBuilder().numVertices(); - for (auto &c : children) { - c->notePositions(bs); - } - recordPosBounds(pb, bs.getBuilder().numVertices()); -} - -void ComponentAlternation::buildFollowSet(GlushkovBuildState &bs, - const vector<PositionInfo> &lastPos) { - for (auto &c : children) { - c->buildFollowSet(bs, lastPos); - } -} - -bool ComponentAlternation::checkEmbeddedStartAnchor(bool at_start) const { - bool rv = at_start; - for (const auto &c : children) { - rv &= c->checkEmbeddedStartAnchor(at_start); - } - - return rv; -} - -bool ComponentAlternation::checkEmbeddedEndAnchor(bool at_end) const { - bool rv = at_end; - for (const auto &c : children) { - rv &= c->checkEmbeddedEndAnchor(at_end); - } - - return rv; -} - -bool ComponentAlternation::vacuous_everywhere(void) const { - for (const auto &c : children) { - if (c->vacuous_everywhere()) { - return true; - } - } - return false; -} - -void ComponentAlternation::optimise(bool connected_to_sds) { - for (auto &c : children) { - c->optimise(connected_to_sds); - } -} - -} // namespace ue2 +/* + * Copyright (c) 2015, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/** \file + * \brief Alternations (foo|bar|baz). + */ + + +#include "ComponentAlternation.h" + +#include "buildstate.h" +#include "position.h" +#include "position_info.h" +#include "nfagraph/ng_builder.h" +#include "ue2common.h" + +#include <algorithm> + +using namespace std; + +namespace ue2 { + +ComponentAlternation::ComponentAlternation() { + // empty +} + +ComponentAlternation::~ComponentAlternation() { + // empty +} + +ComponentAlternation::ComponentAlternation(const ComponentAlternation &other) + : Component(other) { + for (const auto &c : other.children) { + assert(c); + children.push_back(unique_ptr<Component>(c->clone())); + } +} + +ComponentAlternation * ComponentAlternation::clone() const { + return new ComponentAlternation(*this); +} + +Component *ComponentAlternation::accept(ComponentVisitor &v) { + Component *c = v.visit(this); + if (c != this) { + v.post(this); + return c; + } + + for (auto i = children.begin(), e = children.end(); i != e; ++i) { + Component *child = i->get(); + c = (*i)->accept(v); + if (c != child) { + // Child has been replaced (new Component pointer) or we've been + // instructed to delete it (null). + i->reset(c); + } + } + + // Remove deleted children. + children.erase(remove(children.begin(), children.end(), nullptr), + children.end()); + + v.post(this); + return this; +} + +void ComponentAlternation::accept(ConstComponentVisitor &v) const { + v.pre(*this); + for (auto i = children.begin(), e = children.end(); i != e; ++i) { + (*i)->accept(v); + if (i + 1 != e) { + v.during(*this); + } + } + + v.post(*this); +} + +void ComponentAlternation::append(unique_ptr<Component> component) { + children.push_back(move(component)); +} + +vector<PositionInfo> ComponentAlternation::first() const { + // firsts come from all our subcomponents in position order. This will + // maintain left-to-right priority order. + vector<PositionInfo> firsts, subfirsts; + + for (const auto &c : children) { + subfirsts = c->first(); + firsts.insert(firsts.end(), subfirsts.begin(), subfirsts.end()); + } + return firsts; +} + +vector<PositionInfo> ComponentAlternation::last() const { + vector<PositionInfo> lasts, sublasts; + + for (const auto &c : children) { + sublasts = c->last(); + lasts.insert(lasts.end(), sublasts.begin(), sublasts.end()); + } + return lasts; +} + +bool ComponentAlternation::empty(void) const { + // an alternation can be empty if any of its components are empty + for (const auto &c : children) { + if (c->empty()) { + return true; + } + } + + return false; +} + +void ComponentAlternation::notePositions(GlushkovBuildState &bs) { + u32 pb = bs.getBuilder().numVertices(); + for (auto &c : children) { + c->notePositions(bs); + } + recordPosBounds(pb, bs.getBuilder().numVertices()); +} + +void ComponentAlternation::buildFollowSet(GlushkovBuildState &bs, + const vector<PositionInfo> &lastPos) { + for (auto &c : children) { + c->buildFollowSet(bs, lastPos); + } +} + +bool ComponentAlternation::checkEmbeddedStartAnchor(bool at_start) const { + bool rv = at_start; + for (const auto &c : children) { + rv &= c->checkEmbeddedStartAnchor(at_start); + } + + return rv; +} + +bool ComponentAlternation::checkEmbeddedEndAnchor(bool at_end) const { + bool rv = at_end; + for (const auto &c : children) { + rv &= c->checkEmbeddedEndAnchor(at_end); + } + + return rv; +} + +bool ComponentAlternation::vacuous_everywhere(void) const { + for (const auto &c : children) { + if (c->vacuous_everywhere()) { + return true; + } + } + return false; +} + +void ComponentAlternation::optimise(bool connected_to_sds) { + for (auto &c : children) { + c->optimise(connected_to_sds); + } +} + +} // namespace ue2 diff --git a/contrib/libs/hyperscan/src/parser/ComponentAlternation.h b/contrib/libs/hyperscan/src/parser/ComponentAlternation.h index cbb168c03d..6c40074850 100644 --- a/contrib/libs/hyperscan/src/parser/ComponentAlternation.h +++ b/contrib/libs/hyperscan/src/parser/ComponentAlternation.h @@ -1,79 +1,79 @@ -/* - * Copyright (c) 2015, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -/** \file - * \brief Alternations (foo|bar|baz). - */ - -#ifndef COMPONENT_ALTERNATION_H -#define COMPONENT_ALTERNATION_H - -#include "Component.h" -#include "position.h" - -#include <memory> - -namespace ue2 { - -class PositionInfo; - -class ComponentAlternation : public Component { - friend class DumpVisitor; - friend class SimplifyVisitor; -public: - ComponentAlternation(); - ~ComponentAlternation() override; - ComponentAlternation *clone() const override; - Component *accept(ComponentVisitor &v) override; - void accept(ConstComponentVisitor &v) const override; - - size_t numBranches() const { return children.size(); } - - void append(std::unique_ptr<Component> component); - - std::vector<PositionInfo> first() const override; - std::vector<PositionInfo> last() const override; - bool empty(void) const override; - bool vacuous_everywhere() const override; - void notePositions(GlushkovBuildState &bs) override; - void buildFollowSet(GlushkovBuildState &bs, - const std::vector<PositionInfo> &lastPos) override; - bool checkEmbeddedStartAnchor(bool at_start) const override; - bool checkEmbeddedEndAnchor(bool at_end) const override; - - void optimise(bool connected_to_sds) override; - -private: - std::vector<std::unique_ptr<Component>> children; - - ComponentAlternation(const ComponentAlternation &other); -}; - -} // namespace ue2 - -#endif +/* + * Copyright (c) 2015, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/** \file + * \brief Alternations (foo|bar|baz). + */ + +#ifndef COMPONENT_ALTERNATION_H +#define COMPONENT_ALTERNATION_H + +#include "Component.h" +#include "position.h" + +#include <memory> + +namespace ue2 { + +class PositionInfo; + +class ComponentAlternation : public Component { + friend class DumpVisitor; + friend class SimplifyVisitor; +public: + ComponentAlternation(); + ~ComponentAlternation() override; + ComponentAlternation *clone() const override; + Component *accept(ComponentVisitor &v) override; + void accept(ConstComponentVisitor &v) const override; + + size_t numBranches() const { return children.size(); } + + void append(std::unique_ptr<Component> component); + + std::vector<PositionInfo> first() const override; + std::vector<PositionInfo> last() const override; + bool empty(void) const override; + bool vacuous_everywhere() const override; + void notePositions(GlushkovBuildState &bs) override; + void buildFollowSet(GlushkovBuildState &bs, + const std::vector<PositionInfo> &lastPos) override; + bool checkEmbeddedStartAnchor(bool at_start) const override; + bool checkEmbeddedEndAnchor(bool at_end) const override; + + void optimise(bool connected_to_sds) override; + +private: + std::vector<std::unique_ptr<Component>> children; + + ComponentAlternation(const ComponentAlternation &other); +}; + +} // namespace ue2 + +#endif diff --git a/contrib/libs/hyperscan/src/parser/ComponentAssertion.cpp b/contrib/libs/hyperscan/src/parser/ComponentAssertion.cpp index ae023dad5a..cadff93264 100644 --- a/contrib/libs/hyperscan/src/parser/ComponentAssertion.cpp +++ b/contrib/libs/hyperscan/src/parser/ComponentAssertion.cpp @@ -1,121 +1,121 @@ -/* - * Copyright (c) 2015, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -/** \file - * \brief Lookahead/lookbehind zero-width assertions. - */ -#include "ComponentAssertion.h" -#include "buildstate.h" -#include "position.h" -#include "position_info.h" -#include "ue2common.h" - -#include <cassert> -#include <algorithm> - -using namespace std; - -namespace ue2 { - -ComponentAssertion::ComponentAssertion(enum Direction dir, enum Sense sense) - : m_dir(dir), m_sense(sense) {} - -ComponentAssertion::~ComponentAssertion() { } - -ComponentAssertion *ComponentAssertion::clone() const { - return new ComponentAssertion(*this); -} - -Component * ComponentAssertion::accept(ComponentVisitor &v) { - Component *c = v.visit(this); - if (c != this) { - v.post(this); - return c; - } - - for (auto i = children.begin(), e = children.end(); i != e; ++i) { - Component *child = i->get(); - c = (*i)->accept(v); - if (c != child) { - // Child has been replaced (new Component pointer) or we've been - // instructed to delete it (null). - i->reset(c); - } - } - - // Remove deleted children. - children.erase(remove(children.begin(), children.end(), nullptr), - children.end()); - - v.post(this); - return this; -} - -void ComponentAssertion::accept(ConstComponentVisitor &v) const { - v.pre(*this); - for (auto i = children.begin(), e = children.end(); i != e; ++i) { - (*i)->accept(v); - if (i + 1 != e) { - v.during(*this); - } - } - - v.post(*this); -} - -vector<PositionInfo> ComponentAssertion::first() const { - assert(0); - return vector<PositionInfo>(); -} - -vector<PositionInfo> ComponentAssertion::last() const { - assert(0); - return vector<PositionInfo>(); -} - -bool ComponentAssertion::empty() const { - return true; -} - -void ComponentAssertion::notePositions(GlushkovBuildState &) { - assert(0); -} - -void ComponentAssertion::buildFollowSet(GlushkovBuildState &, - const vector<PositionInfo> &) { - assert(0); -} - -bool ComponentAssertion::repeatable() const { - // If this assertion has no children (it's an empty sequence, like that - // produced by '(?!)') then PCRE would throw a "nothing to repeat" error. - // So we do as well. - return !children.empty(); -} - -} // namespace ue2 +/* + * Copyright (c) 2015, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/** \file + * \brief Lookahead/lookbehind zero-width assertions. + */ +#include "ComponentAssertion.h" +#include "buildstate.h" +#include "position.h" +#include "position_info.h" +#include "ue2common.h" + +#include <cassert> +#include <algorithm> + +using namespace std; + +namespace ue2 { + +ComponentAssertion::ComponentAssertion(enum Direction dir, enum Sense sense) + : m_dir(dir), m_sense(sense) {} + +ComponentAssertion::~ComponentAssertion() { } + +ComponentAssertion *ComponentAssertion::clone() const { + return new ComponentAssertion(*this); +} + +Component * ComponentAssertion::accept(ComponentVisitor &v) { + Component *c = v.visit(this); + if (c != this) { + v.post(this); + return c; + } + + for (auto i = children.begin(), e = children.end(); i != e; ++i) { + Component *child = i->get(); + c = (*i)->accept(v); + if (c != child) { + // Child has been replaced (new Component pointer) or we've been + // instructed to delete it (null). + i->reset(c); + } + } + + // Remove deleted children. + children.erase(remove(children.begin(), children.end(), nullptr), + children.end()); + + v.post(this); + return this; +} + +void ComponentAssertion::accept(ConstComponentVisitor &v) const { + v.pre(*this); + for (auto i = children.begin(), e = children.end(); i != e; ++i) { + (*i)->accept(v); + if (i + 1 != e) { + v.during(*this); + } + } + + v.post(*this); +} + +vector<PositionInfo> ComponentAssertion::first() const { + assert(0); + return vector<PositionInfo>(); +} + +vector<PositionInfo> ComponentAssertion::last() const { + assert(0); + return vector<PositionInfo>(); +} + +bool ComponentAssertion::empty() const { + return true; +} + +void ComponentAssertion::notePositions(GlushkovBuildState &) { + assert(0); +} + +void ComponentAssertion::buildFollowSet(GlushkovBuildState &, + const vector<PositionInfo> &) { + assert(0); +} + +bool ComponentAssertion::repeatable() const { + // If this assertion has no children (it's an empty sequence, like that + // produced by '(?!)') then PCRE would throw a "nothing to repeat" error. + // So we do as well. + return !children.empty(); +} + +} // namespace ue2 diff --git a/contrib/libs/hyperscan/src/parser/ComponentAssertion.h b/contrib/libs/hyperscan/src/parser/ComponentAssertion.h index 60b38cded0..fc78de0aac 100644 --- a/contrib/libs/hyperscan/src/parser/ComponentAssertion.h +++ b/contrib/libs/hyperscan/src/parser/ComponentAssertion.h @@ -1,76 +1,76 @@ -/* - * Copyright (c) 2015, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -/** \file - * \brief Lookahead/lookbehind zero-width assertions. - */ - -#ifndef _RE_COMPONENTASSERTION_H_ -#define _RE_COMPONENTASSERTION_H_ - -#include "ComponentSequence.h" - -namespace ue2 { - -class ComponentAssertion : public ComponentSequence { - friend class DumpVisitor; - friend class PrintVisitor; -public: - enum Direction { - LOOKAHEAD, //!< lookahead (forward) assertion - LOOKBEHIND //!< lookbehind (backward) assertion - }; - - enum Sense { - POS, //!< positive assertion, (?=...) or (?<=...) - NEG //!< negative assertion, (?!...) or (?<!...) - }; - - ComponentAssertion(enum Direction dir, enum Sense sense); - ~ComponentAssertion() override; - ComponentAssertion *clone() const override; - Component *accept(ComponentVisitor &v) override; - void accept(ConstComponentVisitor &v) const override; - - std::vector<PositionInfo> first() const override; - std::vector<PositionInfo> last() const override; - - bool empty() const override; - void notePositions(GlushkovBuildState &bs) override; - void buildFollowSet(GlushkovBuildState &bs, - const std::vector<PositionInfo> &lastPos) override; - bool repeatable() const override; - -private: - enum Direction m_dir; - enum Sense m_sense; -}; - -} // namespace ue2 - -#endif +/* + * Copyright (c) 2015, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/** \file + * \brief Lookahead/lookbehind zero-width assertions. + */ + +#ifndef _RE_COMPONENTASSERTION_H_ +#define _RE_COMPONENTASSERTION_H_ + +#include "ComponentSequence.h" + +namespace ue2 { + +class ComponentAssertion : public ComponentSequence { + friend class DumpVisitor; + friend class PrintVisitor; +public: + enum Direction { + LOOKAHEAD, //!< lookahead (forward) assertion + LOOKBEHIND //!< lookbehind (backward) assertion + }; + + enum Sense { + POS, //!< positive assertion, (?=...) or (?<=...) + NEG //!< negative assertion, (?!...) or (?<!...) + }; + + ComponentAssertion(enum Direction dir, enum Sense sense); + ~ComponentAssertion() override; + ComponentAssertion *clone() const override; + Component *accept(ComponentVisitor &v) override; + void accept(ConstComponentVisitor &v) const override; + + std::vector<PositionInfo> first() const override; + std::vector<PositionInfo> last() const override; + + bool empty() const override; + void notePositions(GlushkovBuildState &bs) override; + void buildFollowSet(GlushkovBuildState &bs, + const std::vector<PositionInfo> &lastPos) override; + bool repeatable() const override; + +private: + enum Direction m_dir; + enum Sense m_sense; +}; + +} // namespace ue2 + +#endif diff --git a/contrib/libs/hyperscan/src/parser/ComponentAtomicGroup.cpp b/contrib/libs/hyperscan/src/parser/ComponentAtomicGroup.cpp index 986ca6d9ba..106f24fc4d 100644 --- a/contrib/libs/hyperscan/src/parser/ComponentAtomicGroup.cpp +++ b/contrib/libs/hyperscan/src/parser/ComponentAtomicGroup.cpp @@ -1,92 +1,92 @@ -/* - * Copyright (c) 2015, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -/** \file - * \brief Atomic groups (?>...) - */ -#include "ComponentAtomicGroup.h" -#include "buildstate.h" -#include "position.h" - -#include <algorithm> - -using namespace std; - -namespace ue2 { - -ComponentAtomicGroup *ComponentAtomicGroup::clone() const { - return new ComponentAtomicGroup(*this); -} - -Component *ComponentAtomicGroup::accept(ComponentVisitor &v) { - Component *c = v.visit(this); - if (c != this) { - v.post(this); - return c; - } - - for (auto i = children.begin(), e = children.end(); i != e; ++i) { - Component *child = i->get(); - c = (*i)->accept(v); - if (c != child) { - // Child has been replaced (new Component pointer) or we've been - // instructed to delete it (null). - i->reset(c); - } - } - - // Remove deleted children. - children.erase(remove(children.begin(), children.end(), nullptr), - children.end()); - - v.post(this); - return this; -} - -void ComponentAtomicGroup::accept(ConstComponentVisitor &v) const { - v.pre(*this); - for (auto i = children.begin(), e = children.end(); i != e; ++i) { - (*i)->accept(v); - if (i + 1 != e) { - v.during(*this); - } - } - - v.post(*this); -} - -void ComponentAtomicGroup::notePositions(GlushkovBuildState &) { - assert(0); -} - -void ComponentAtomicGroup::buildFollowSet(GlushkovBuildState &, - const vector<PositionInfo> &) { - assert(0); -} - -} // namespace +/* + * Copyright (c) 2015, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/** \file + * \brief Atomic groups (?>...) + */ +#include "ComponentAtomicGroup.h" +#include "buildstate.h" +#include "position.h" + +#include <algorithm> + +using namespace std; + +namespace ue2 { + +ComponentAtomicGroup *ComponentAtomicGroup::clone() const { + return new ComponentAtomicGroup(*this); +} + +Component *ComponentAtomicGroup::accept(ComponentVisitor &v) { + Component *c = v.visit(this); + if (c != this) { + v.post(this); + return c; + } + + for (auto i = children.begin(), e = children.end(); i != e; ++i) { + Component *child = i->get(); + c = (*i)->accept(v); + if (c != child) { + // Child has been replaced (new Component pointer) or we've been + // instructed to delete it (null). + i->reset(c); + } + } + + // Remove deleted children. + children.erase(remove(children.begin(), children.end(), nullptr), + children.end()); + + v.post(this); + return this; +} + +void ComponentAtomicGroup::accept(ConstComponentVisitor &v) const { + v.pre(*this); + for (auto i = children.begin(), e = children.end(); i != e; ++i) { + (*i)->accept(v); + if (i + 1 != e) { + v.during(*this); + } + } + + v.post(*this); +} + +void ComponentAtomicGroup::notePositions(GlushkovBuildState &) { + assert(0); +} + +void ComponentAtomicGroup::buildFollowSet(GlushkovBuildState &, + const vector<PositionInfo> &) { + assert(0); +} + +} // namespace diff --git a/contrib/libs/hyperscan/src/parser/ComponentAtomicGroup.h b/contrib/libs/hyperscan/src/parser/ComponentAtomicGroup.h index e24fb3d99d..d4eab293d9 100644 --- a/contrib/libs/hyperscan/src/parser/ComponentAtomicGroup.h +++ b/contrib/libs/hyperscan/src/parser/ComponentAtomicGroup.h @@ -1,58 +1,58 @@ -/* - * Copyright (c) 2015, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -/** \file - * \brief Atomic groups (?>...) - */ - -#ifndef _COMPONENTATOMICGROUP_H_ -#define _COMPONENTATOMICGROUP_H_ - -#include "ComponentSequence.h" - -namespace ue2 { - -// The atomic group component is a subclass of sequence that is only buildable -// in prefilter mode, where we treat it as a standard sequence. -class ComponentAtomicGroup : public ComponentSequence { - friend class DumpVisitor; -public: - ComponentAtomicGroup() {} - ~ComponentAtomicGroup() override {} - ComponentAtomicGroup *clone() const override; - Component *accept(ComponentVisitor &v) override; - void accept(ConstComponentVisitor &v) const override; - - void notePositions(GlushkovBuildState &bs) override; - void buildFollowSet(GlushkovBuildState &bs, - const std::vector<PositionInfo> &lastPos) override; -}; - -} // namespace ue2 - -#endif +/* + * Copyright (c) 2015, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/** \file + * \brief Atomic groups (?>...) + */ + +#ifndef _COMPONENTATOMICGROUP_H_ +#define _COMPONENTATOMICGROUP_H_ + +#include "ComponentSequence.h" + +namespace ue2 { + +// The atomic group component is a subclass of sequence that is only buildable +// in prefilter mode, where we treat it as a standard sequence. +class ComponentAtomicGroup : public ComponentSequence { + friend class DumpVisitor; +public: + ComponentAtomicGroup() {} + ~ComponentAtomicGroup() override {} + ComponentAtomicGroup *clone() const override; + Component *accept(ComponentVisitor &v) override; + void accept(ConstComponentVisitor &v) const override; + + void notePositions(GlushkovBuildState &bs) override; + void buildFollowSet(GlushkovBuildState &bs, + const std::vector<PositionInfo> &lastPos) override; +}; + +} // namespace ue2 + +#endif diff --git a/contrib/libs/hyperscan/src/parser/ComponentBackReference.cpp b/contrib/libs/hyperscan/src/parser/ComponentBackReference.cpp index 4cb5f44d23..1edc530d25 100644 --- a/contrib/libs/hyperscan/src/parser/ComponentBackReference.cpp +++ b/contrib/libs/hyperscan/src/parser/ComponentBackReference.cpp @@ -1,79 +1,79 @@ -/* - * Copyright (c) 2015, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -/** \file - * \brief Back-references (/([a-f]{3}).*\\1/) - */ - - -#include "ComponentBackReference.h" - -#include "buildstate.h" -#include "position.h" -#include "position_info.h" -#include "nfagraph/ng_builder.h" -#include "util/charreach.h" - -#include <cassert> - -using namespace std; - -namespace ue2 { - -ComponentBackReference::ComponentBackReference(unsigned int id) - : ref_id(id) {} - -ComponentBackReference::ComponentBackReference(const string &s) - : name(s), ref_id(0) {} - -ComponentBackReference * ComponentBackReference::clone() const { - return new ComponentBackReference(*this); -} - -vector<PositionInfo> ComponentBackReference::first() const { - assert(0); - return vector<PositionInfo>(); -} - -vector<PositionInfo> ComponentBackReference::last() const { - assert(0); - return vector<PositionInfo>(); -} - -bool ComponentBackReference::empty(void) const { return true; } - -void ComponentBackReference::notePositions(GlushkovBuildState &) { - assert(0); -} - -void ComponentBackReference::buildFollowSet(GlushkovBuildState &, - const vector<PositionInfo> &) { - assert(0); -} - -} // namespace +/* + * Copyright (c) 2015, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/** \file + * \brief Back-references (/([a-f]{3}).*\\1/) + */ + + +#include "ComponentBackReference.h" + +#include "buildstate.h" +#include "position.h" +#include "position_info.h" +#include "nfagraph/ng_builder.h" +#include "util/charreach.h" + +#include <cassert> + +using namespace std; + +namespace ue2 { + +ComponentBackReference::ComponentBackReference(unsigned int id) + : ref_id(id) {} + +ComponentBackReference::ComponentBackReference(const string &s) + : name(s), ref_id(0) {} + +ComponentBackReference * ComponentBackReference::clone() const { + return new ComponentBackReference(*this); +} + +vector<PositionInfo> ComponentBackReference::first() const { + assert(0); + return vector<PositionInfo>(); +} + +vector<PositionInfo> ComponentBackReference::last() const { + assert(0); + return vector<PositionInfo>(); +} + +bool ComponentBackReference::empty(void) const { return true; } + +void ComponentBackReference::notePositions(GlushkovBuildState &) { + assert(0); +} + +void ComponentBackReference::buildFollowSet(GlushkovBuildState &, + const vector<PositionInfo> &) { + assert(0); +} + +} // namespace diff --git a/contrib/libs/hyperscan/src/parser/ComponentBackReference.h b/contrib/libs/hyperscan/src/parser/ComponentBackReference.h index d8324d3bc4..d22df7a7f6 100644 --- a/contrib/libs/hyperscan/src/parser/ComponentBackReference.h +++ b/contrib/libs/hyperscan/src/parser/ComponentBackReference.h @@ -1,84 +1,84 @@ -/* - * Copyright (c) 2015, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -/** \file - * \brief Back-references (/([a-f]{3}).*\\1/) - */ - -#ifndef _RE_COMPONENTBACKREFERENCE_H_ -#define _RE_COMPONENTBACKREFERENCE_H_ - -#include "Component.h" -#include <string> - -namespace ue2 { - -class ComponentBackReference : public Component { - friend class DumpVisitor; - friend class PrintVisitor; - friend class ReferenceVisitor; -public: - explicit ComponentBackReference(unsigned int id); - explicit ComponentBackReference(const std::string &s); - ~ComponentBackReference() override {} - ComponentBackReference *clone() const override; - - Component *accept(ComponentVisitor &v) override { - Component *c = v.visit(this); - v.post(this); - return c; - } - - void accept(ConstComponentVisitor &v) const override { - v.pre(*this); - v.during(*this); - v.post(*this); - } - - unsigned int getRefID() const { return ref_id; } - const std::string &getRefName() const { return name; } - - std::vector<PositionInfo> first() const override; - std::vector<PositionInfo> last() const override; - bool empty(void) const override; - void notePositions(GlushkovBuildState &bs) override; - void buildFollowSet(GlushkovBuildState &bs, - const std::vector<PositionInfo> &lastPos) override; - -private: - // Private copy ctor. Use clone instead. - ComponentBackReference(const ComponentBackReference &other) - : Component(other), name(other.name), ref_id(other.ref_id) {} - - std::string name; - unsigned int ref_id; -}; - -} // namespace ue2 - -#endif +/* + * Copyright (c) 2015, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/** \file + * \brief Back-references (/([a-f]{3}).*\\1/) + */ + +#ifndef _RE_COMPONENTBACKREFERENCE_H_ +#define _RE_COMPONENTBACKREFERENCE_H_ + +#include "Component.h" +#include <string> + +namespace ue2 { + +class ComponentBackReference : public Component { + friend class DumpVisitor; + friend class PrintVisitor; + friend class ReferenceVisitor; +public: + explicit ComponentBackReference(unsigned int id); + explicit ComponentBackReference(const std::string &s); + ~ComponentBackReference() override {} + ComponentBackReference *clone() const override; + + Component *accept(ComponentVisitor &v) override { + Component *c = v.visit(this); + v.post(this); + return c; + } + + void accept(ConstComponentVisitor &v) const override { + v.pre(*this); + v.during(*this); + v.post(*this); + } + + unsigned int getRefID() const { return ref_id; } + const std::string &getRefName() const { return name; } + + std::vector<PositionInfo> first() const override; + std::vector<PositionInfo> last() const override; + bool empty(void) const override; + void notePositions(GlushkovBuildState &bs) override; + void buildFollowSet(GlushkovBuildState &bs, + const std::vector<PositionInfo> &lastPos) override; + +private: + // Private copy ctor. Use clone instead. + ComponentBackReference(const ComponentBackReference &other) + : Component(other), name(other.name), ref_id(other.ref_id) {} + + std::string name; + unsigned int ref_id; +}; + +} // namespace ue2 + +#endif diff --git a/contrib/libs/hyperscan/src/parser/ComponentBoundary.cpp b/contrib/libs/hyperscan/src/parser/ComponentBoundary.cpp index 6b1c9038dc..efd6bf88dd 100644 --- a/contrib/libs/hyperscan/src/parser/ComponentBoundary.cpp +++ b/contrib/libs/hyperscan/src/parser/ComponentBoundary.cpp @@ -1,186 +1,186 @@ -/* - * Copyright (c) 2015, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -/** \file - * \brief Boundary assertions (^, $, \\A, \\Z, \\z) - */ - - -#include "ComponentBoundary.h" - -#include "buildstate.h" -#include "parse_error.h" -#include "position.h" -#include "position_info.h" -#include "Parser.h" -#include "util/charreach.h" -#include "nfagraph/ng_builder.h" - -#include <cassert> - -using namespace std; - -namespace ue2 { - -ComponentBoundary::ComponentBoundary(enum Boundary bound) - : m_bound(bound), m_newline(GlushkovBuildState::POS_UNINITIALIZED) {} - -ComponentBoundary::~ComponentBoundary() { -} - -ComponentBoundary::ComponentBoundary(const ComponentBoundary &other) - : Component(other), m_bound(other.m_bound), m_newline(other.m_newline), - m_first(other.m_first), m_last(other.m_last) {} - -ComponentBoundary * ComponentBoundary::clone() const { - return new ComponentBoundary(*this); -} - -vector<PositionInfo> ComponentBoundary::first() const { - return m_first; -} - -vector<PositionInfo> ComponentBoundary::last() const { - return m_last; -} - -bool ComponentBoundary::empty() const { - return true; -} - -bool ComponentBoundary::repeatable() const { - return false; -} - -static -Position makeNewline(GlushkovBuildState &bs) { - NFABuilder &builder = bs.getBuilder(); - Position newline = builder.makePositions(1); - builder.addCharReach(newline, CharReach('\n')); - return newline; -} - -void ComponentBoundary::notePositions(GlushkovBuildState & bs) { - NFABuilder &builder = bs.getBuilder(); - const Position startState = builder.getStart(); - - switch (m_bound) { - case BEGIN_STRING: // beginning of data stream ('^') - { - PositionInfo epsilon(GlushkovBuildState::POS_EPSILON); - epsilon.flags = POS_FLAG_NOFLOAT; - m_first.push_back(epsilon); - - // We have the start vertex in firsts so that we can discourage - // the mid-pattern use of boundaries. - m_first.push_back(startState); - - break; - } - case BEGIN_LINE: // multiline anchor: beginning of stream or a newline - { - PositionInfo epsilon(GlushkovBuildState::POS_EPSILON); - epsilon.flags = POS_FLAG_NOFLOAT; - m_first.push_back(epsilon); - - // We have the start vertex in firsts so that we can discourage - // the mid-pattern use of boundaries. - m_first.push_back(startState); - - // Newline - m_newline = makeNewline(bs); - builder.setAssertFlag(m_newline, POS_FLAG_MULTILINE_START); - builder.setAssertFlag(m_newline, POS_FLAG_VIRTUAL_START); - PositionInfo nl(m_newline); - nl.flags = POS_FLAG_MUST_FLOAT | POS_FLAG_FIDDLE_ACCEPT; - m_first.push_back(nl); - m_last.push_back(nl); - recordPosBounds(m_newline, m_newline + 1); - break; - } - case END_STRING: // end of data stream ('\z') - { - PositionInfo epsilon(GlushkovBuildState::POS_EPSILON); - epsilon.flags = POS_FLAG_WIRE_EOD | POS_FLAG_NO_NL_EOD | - POS_FLAG_NO_NL_ACCEPT | POS_FLAG_ONLY_ENDS; - m_first.push_back(epsilon); - break; - } - case END_STRING_OPTIONAL_LF: // end of data with optional LF ('$') - { - PositionInfo epsilon(GlushkovBuildState::POS_EPSILON); - epsilon.flags = POS_FLAG_WIRE_EOD | POS_FLAG_WIRE_NL_EOD | - POS_FLAG_NO_NL_ACCEPT | POS_FLAG_ONLY_ENDS; - m_first.push_back(epsilon); - break; - } - case END_LINE: // multiline anchor: end of data or a newline - { - PositionInfo epsilon(GlushkovBuildState::POS_EPSILON); - epsilon.flags = POS_FLAG_WIRE_EOD | POS_FLAG_WIRE_NL_EOD | - POS_FLAG_WIRE_NL_ACCEPT | POS_FLAG_ONLY_ENDS; - m_first.push_back(epsilon); - break; - } - default: - // unsupported - assert(0); - break; - } -} - -void ComponentBoundary::buildFollowSet(GlushkovBuildState &, - const vector<PositionInfo> &) { - -} - -bool ComponentBoundary::checkEmbeddedStartAnchor(bool at_start) const { - if (at_start) { - return at_start; - } - - if (m_bound == BEGIN_STRING || m_bound == BEGIN_LINE) { - throw ParseError("Embedded start anchors not supported."); - } - - return at_start; -} - -bool ComponentBoundary::checkEmbeddedEndAnchor(bool at_end) const { - if (at_end) { - return at_end; - } - - if (m_bound != BEGIN_STRING && m_bound != BEGIN_LINE) { - throw ParseError("Embedded end anchors not supported."); - } - - return at_end; -} - -} // namespace +/* + * Copyright (c) 2015, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/** \file + * \brief Boundary assertions (^, $, \\A, \\Z, \\z) + */ + + +#include "ComponentBoundary.h" + +#include "buildstate.h" +#include "parse_error.h" +#include "position.h" +#include "position_info.h" +#include "Parser.h" +#include "util/charreach.h" +#include "nfagraph/ng_builder.h" + +#include <cassert> + +using namespace std; + +namespace ue2 { + +ComponentBoundary::ComponentBoundary(enum Boundary bound) + : m_bound(bound), m_newline(GlushkovBuildState::POS_UNINITIALIZED) {} + +ComponentBoundary::~ComponentBoundary() { +} + +ComponentBoundary::ComponentBoundary(const ComponentBoundary &other) + : Component(other), m_bound(other.m_bound), m_newline(other.m_newline), + m_first(other.m_first), m_last(other.m_last) {} + +ComponentBoundary * ComponentBoundary::clone() const { + return new ComponentBoundary(*this); +} + +vector<PositionInfo> ComponentBoundary::first() const { + return m_first; +} + +vector<PositionInfo> ComponentBoundary::last() const { + return m_last; +} + +bool ComponentBoundary::empty() const { + return true; +} + +bool ComponentBoundary::repeatable() const { + return false; +} + +static +Position makeNewline(GlushkovBuildState &bs) { + NFABuilder &builder = bs.getBuilder(); + Position newline = builder.makePositions(1); + builder.addCharReach(newline, CharReach('\n')); + return newline; +} + +void ComponentBoundary::notePositions(GlushkovBuildState & bs) { + NFABuilder &builder = bs.getBuilder(); + const Position startState = builder.getStart(); + + switch (m_bound) { + case BEGIN_STRING: // beginning of data stream ('^') + { + PositionInfo epsilon(GlushkovBuildState::POS_EPSILON); + epsilon.flags = POS_FLAG_NOFLOAT; + m_first.push_back(epsilon); + + // We have the start vertex in firsts so that we can discourage + // the mid-pattern use of boundaries. + m_first.push_back(startState); + + break; + } + case BEGIN_LINE: // multiline anchor: beginning of stream or a newline + { + PositionInfo epsilon(GlushkovBuildState::POS_EPSILON); + epsilon.flags = POS_FLAG_NOFLOAT; + m_first.push_back(epsilon); + + // We have the start vertex in firsts so that we can discourage + // the mid-pattern use of boundaries. + m_first.push_back(startState); + + // Newline + m_newline = makeNewline(bs); + builder.setAssertFlag(m_newline, POS_FLAG_MULTILINE_START); + builder.setAssertFlag(m_newline, POS_FLAG_VIRTUAL_START); + PositionInfo nl(m_newline); + nl.flags = POS_FLAG_MUST_FLOAT | POS_FLAG_FIDDLE_ACCEPT; + m_first.push_back(nl); + m_last.push_back(nl); + recordPosBounds(m_newline, m_newline + 1); + break; + } + case END_STRING: // end of data stream ('\z') + { + PositionInfo epsilon(GlushkovBuildState::POS_EPSILON); + epsilon.flags = POS_FLAG_WIRE_EOD | POS_FLAG_NO_NL_EOD | + POS_FLAG_NO_NL_ACCEPT | POS_FLAG_ONLY_ENDS; + m_first.push_back(epsilon); + break; + } + case END_STRING_OPTIONAL_LF: // end of data with optional LF ('$') + { + PositionInfo epsilon(GlushkovBuildState::POS_EPSILON); + epsilon.flags = POS_FLAG_WIRE_EOD | POS_FLAG_WIRE_NL_EOD | + POS_FLAG_NO_NL_ACCEPT | POS_FLAG_ONLY_ENDS; + m_first.push_back(epsilon); + break; + } + case END_LINE: // multiline anchor: end of data or a newline + { + PositionInfo epsilon(GlushkovBuildState::POS_EPSILON); + epsilon.flags = POS_FLAG_WIRE_EOD | POS_FLAG_WIRE_NL_EOD | + POS_FLAG_WIRE_NL_ACCEPT | POS_FLAG_ONLY_ENDS; + m_first.push_back(epsilon); + break; + } + default: + // unsupported + assert(0); + break; + } +} + +void ComponentBoundary::buildFollowSet(GlushkovBuildState &, + const vector<PositionInfo> &) { + +} + +bool ComponentBoundary::checkEmbeddedStartAnchor(bool at_start) const { + if (at_start) { + return at_start; + } + + if (m_bound == BEGIN_STRING || m_bound == BEGIN_LINE) { + throw ParseError("Embedded start anchors not supported."); + } + + return at_start; +} + +bool ComponentBoundary::checkEmbeddedEndAnchor(bool at_end) const { + if (at_end) { + return at_end; + } + + if (m_bound != BEGIN_STRING && m_bound != BEGIN_LINE) { + throw ParseError("Embedded end anchors not supported."); + } + + return at_end; +} + +} // namespace diff --git a/contrib/libs/hyperscan/src/parser/ComponentBoundary.h b/contrib/libs/hyperscan/src/parser/ComponentBoundary.h index cdc7c7d4dd..fea158ee17 100644 --- a/contrib/libs/hyperscan/src/parser/ComponentBoundary.h +++ b/contrib/libs/hyperscan/src/parser/ComponentBoundary.h @@ -1,94 +1,94 @@ -/* - * Copyright (c) 2015, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -/** \file - * \brief Boundary assertions (^, $, \\A, \\Z, \\z) - */ - -#ifndef _RE_COMPONENTBOUNDARY_H_ -#define _RE_COMPONENTBOUNDARY_H_ - -#include "Component.h" -#include "position.h" - -namespace ue2 { - -/** \brief Encapsulates a line/string boundary assertion. */ -class ComponentBoundary : public Component { - friend class DumpVisitor; - friend class PrintVisitor; - friend class UnsafeBoundsVisitor; - friend class MultilineVisitor; -public: - enum Boundary { - BEGIN_STRING, //!< beginning of data stream - END_STRING, //!< end of data stream - END_STRING_OPTIONAL_LF, //!< end of data stream with an optional - // linefeed - BEGIN_LINE, //!< '(^|\\n)' - END_LINE //!< '($|\\n)' - }; - - explicit ComponentBoundary(enum Boundary bound); - ~ComponentBoundary() override; - ComponentBoundary *clone() const override; - - Component *accept(ComponentVisitor &v) override { - Component *c = v.visit(this); - v.post(this); - return c; - } - - void accept(ConstComponentVisitor &v) const override { - v.pre(*this); - v.during(*this); - v.post(*this); - } - - std::vector<PositionInfo> first() const override; - std::vector<PositionInfo> last() const override; - bool empty() const override; - bool repeatable() const override; - void notePositions(GlushkovBuildState &bs) override; - void buildFollowSet(GlushkovBuildState &bs, - const std::vector<PositionInfo> &lastPos) override; - bool checkEmbeddedStartAnchor(bool at_start) const override; - bool checkEmbeddedEndAnchor(bool at_end) const override; - -private: - enum Boundary m_bound; //!< \brief which assertion is that? - Position m_newline; //!< \brief special newline state - std::vector<PositionInfo> m_first; //!< \brief positions returned for first() - std::vector<PositionInfo> m_last; //!< \brief positions returned for last() - - ComponentBoundary(const ComponentBoundary &other); -}; - -} // namespace ue2 - -#endif +/* + * Copyright (c) 2015, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/** \file + * \brief Boundary assertions (^, $, \\A, \\Z, \\z) + */ + +#ifndef _RE_COMPONENTBOUNDARY_H_ +#define _RE_COMPONENTBOUNDARY_H_ + +#include "Component.h" +#include "position.h" + +namespace ue2 { + +/** \brief Encapsulates a line/string boundary assertion. */ +class ComponentBoundary : public Component { + friend class DumpVisitor; + friend class PrintVisitor; + friend class UnsafeBoundsVisitor; + friend class MultilineVisitor; +public: + enum Boundary { + BEGIN_STRING, //!< beginning of data stream + END_STRING, //!< end of data stream + END_STRING_OPTIONAL_LF, //!< end of data stream with an optional + // linefeed + BEGIN_LINE, //!< '(^|\\n)' + END_LINE //!< '($|\\n)' + }; + + explicit ComponentBoundary(enum Boundary bound); + ~ComponentBoundary() override; + ComponentBoundary *clone() const override; + + Component *accept(ComponentVisitor &v) override { + Component *c = v.visit(this); + v.post(this); + return c; + } + + void accept(ConstComponentVisitor &v) const override { + v.pre(*this); + v.during(*this); + v.post(*this); + } + + std::vector<PositionInfo> first() const override; + std::vector<PositionInfo> last() const override; + bool empty() const override; + bool repeatable() const override; + void notePositions(GlushkovBuildState &bs) override; + void buildFollowSet(GlushkovBuildState &bs, + const std::vector<PositionInfo> &lastPos) override; + bool checkEmbeddedStartAnchor(bool at_start) const override; + bool checkEmbeddedEndAnchor(bool at_end) const override; + +private: + enum Boundary m_bound; //!< \brief which assertion is that? + Position m_newline; //!< \brief special newline state + std::vector<PositionInfo> m_first; //!< \brief positions returned for first() + std::vector<PositionInfo> m_last; //!< \brief positions returned for last() + + ComponentBoundary(const ComponentBoundary &other); +}; + +} // namespace ue2 + +#endif diff --git a/contrib/libs/hyperscan/src/parser/ComponentByte.cpp b/contrib/libs/hyperscan/src/parser/ComponentByte.cpp index f4c5b026b5..c55c477284 100644 --- a/contrib/libs/hyperscan/src/parser/ComponentByte.cpp +++ b/contrib/libs/hyperscan/src/parser/ComponentByte.cpp @@ -1,70 +1,70 @@ -/* - * Copyright (c) 2015, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -/** \file - * \brief Single bytes (\\C metachar) - */ - - -#include "ComponentByte.h" - -#include "buildstate.h" -#include "position.h" -#include "position_info.h" -#include "nfagraph/ng_builder.h" -#include "util/charreach.h" - -using namespace std; - -namespace ue2 { - -ComponentByte::ComponentByte() - : position(GlushkovBuildState::POS_UNINITIALIZED) {} - -ComponentByte::~ComponentByte() {} - -ComponentByte *ComponentByte::clone() const { - return new ComponentByte(*this); -} - -vector<PositionInfo> ComponentByte::first() const { - return vector<PositionInfo>(1, PositionInfo(position)); -} - -vector<PositionInfo> ComponentByte::last() const { - return vector<PositionInfo>(1, PositionInfo(position)); -} - -void ComponentByte::notePositions(GlushkovBuildState &bs) { - NFABuilder &builder = bs.getBuilder(); - position = builder.makePositions(1); - builder.addCharReach(position, CharReach::dot()); - builder.setNodeReportID(position, 0 /* offset adj */); -} - -} // namespace ue2 +/* + * Copyright (c) 2015, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/** \file + * \brief Single bytes (\\C metachar) + */ + + +#include "ComponentByte.h" + +#include "buildstate.h" +#include "position.h" +#include "position_info.h" +#include "nfagraph/ng_builder.h" +#include "util/charreach.h" + +using namespace std; + +namespace ue2 { + +ComponentByte::ComponentByte() + : position(GlushkovBuildState::POS_UNINITIALIZED) {} + +ComponentByte::~ComponentByte() {} + +ComponentByte *ComponentByte::clone() const { + return new ComponentByte(*this); +} + +vector<PositionInfo> ComponentByte::first() const { + return vector<PositionInfo>(1, PositionInfo(position)); +} + +vector<PositionInfo> ComponentByte::last() const { + return vector<PositionInfo>(1, PositionInfo(position)); +} + +void ComponentByte::notePositions(GlushkovBuildState &bs) { + NFABuilder &builder = bs.getBuilder(); + position = builder.makePositions(1); + builder.addCharReach(position, CharReach::dot()); + builder.setNodeReportID(position, 0 /* offset adj */); +} + +} // namespace ue2 diff --git a/contrib/libs/hyperscan/src/parser/ComponentByte.h b/contrib/libs/hyperscan/src/parser/ComponentByte.h index 2f2f143b16..331e326de4 100644 --- a/contrib/libs/hyperscan/src/parser/ComponentByte.h +++ b/contrib/libs/hyperscan/src/parser/ComponentByte.h @@ -1,80 +1,80 @@ -/* - * Copyright (c) 2015, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -/** \file - * \brief Single bytes (\\C metachar) - */ - -#ifndef _RE_COMPONENTBYTE_H_ -#define _RE_COMPONENTBYTE_H_ - -#include "Component.h" - -namespace ue2 { - -class ComponentByte : public Component { - friend class DumpVisitor; -public: - ComponentByte(void); - ~ComponentByte() override; - ComponentByte *clone() const override; - - Component *accept(ComponentVisitor &v) override { - Component *c = v.visit(this); - v.post(this); - return c; - } - - void accept(ConstComponentVisitor &v) const override { - v.pre(*this); - v.during(*this); - v.post(*this); - } - - std::vector<PositionInfo> first() const override; - std::vector<PositionInfo> last() const override; - - bool empty() const override { return false; } - - void notePositions(GlushkovBuildState &bs) override; - void buildFollowSet(GlushkovBuildState &, - const std::vector<PositionInfo> &) override { - // all follow set construction is handled by firsts/lasts - return; - } - -private: - Position position; - - ComponentByte(const ComponentByte &other) - : Component(other), position(other.position) {} -}; - -} // namespace ue2 - -#endif +/* + * Copyright (c) 2015, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/** \file + * \brief Single bytes (\\C metachar) + */ + +#ifndef _RE_COMPONENTBYTE_H_ +#define _RE_COMPONENTBYTE_H_ + +#include "Component.h" + +namespace ue2 { + +class ComponentByte : public Component { + friend class DumpVisitor; +public: + ComponentByte(void); + ~ComponentByte() override; + ComponentByte *clone() const override; + + Component *accept(ComponentVisitor &v) override { + Component *c = v.visit(this); + v.post(this); + return c; + } + + void accept(ConstComponentVisitor &v) const override { + v.pre(*this); + v.during(*this); + v.post(*this); + } + + std::vector<PositionInfo> first() const override; + std::vector<PositionInfo> last() const override; + + bool empty() const override { return false; } + + void notePositions(GlushkovBuildState &bs) override; + void buildFollowSet(GlushkovBuildState &, + const std::vector<PositionInfo> &) override { + // all follow set construction is handled by firsts/lasts + return; + } + +private: + Position position; + + ComponentByte(const ComponentByte &other) + : Component(other), position(other.position) {} +}; + +} // namespace ue2 + +#endif diff --git a/contrib/libs/hyperscan/src/parser/ComponentClass.cpp b/contrib/libs/hyperscan/src/parser/ComponentClass.cpp index c61c7de0a2..a91ae979ff 100644 --- a/contrib/libs/hyperscan/src/parser/ComponentClass.cpp +++ b/contrib/libs/hyperscan/src/parser/ComponentClass.cpp @@ -1,452 +1,452 @@ -/* - * Copyright (c) 2015, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -/** \file - * \brief Character classes and their mnemonics. - */ -#include "Parser.h" -#include "ComponentClass.h" -#include "AsciiComponentClass.h" -#include "ucp_table.h" -#include "Utf8ComponentClass.h" -#include "util/charreach.h" -#include "util/make_unique.h" - -#include <boost/icl/interval_set.hpp> - -using namespace std; - -namespace ue2 { - -static -CharReach to_cr(const CodePointSet &cps) { - CharReach cr; - for (const auto &cp : cps) { - if (lower(cp) >= CharReach::npos) { - break; - } - - cr.setRange(lower(cp), MIN(upper(cp), CharReach::npos - 1)); - } - - return cr; -} - -CharReach getPredefinedCharReach(PredefinedClass c, const ParseMode &mode) { - const CharReach lower('a', 'z'); - const CharReach upper('A', 'Z'); - const CharReach number('0', '9'); - switch (c) { - case CLASS_ALNUM: - return lower | upper | number; - case CLASS_ALPHA: - return lower | upper; - case CLASS_ANY: - if (mode.dotall) { - return ~CharReach(); - } else { - return ~CharReach('\n'); - } - case CLASS_ASCII: - return CharReach(0, 127); - case CLASS_BLANK: - return CharReach(" \t"); - case CLASS_CNTRL: - return CharReach(0, 31) | CharReach(127 /* del */); - case CLASS_DIGIT: - return number; - case CLASS_GRAPH: - return CharReach(0x21, 0x7e); - case CLASS_XGRAPH: - return to_cr(getPredefinedCodePointSet(c, mode)); - case CLASS_HORZ: - return CharReach("\x09\x20\xA0"); - case CLASS_LOWER: - if (mode.caseless) { - return lower | upper; - } else { - return lower; - } - case CLASS_PRINT: - return CharReach(0x20, 0x7e); - case CLASS_XPRINT: - return to_cr(getPredefinedCodePointSet(c, mode)); - case CLASS_PUNCT: - return CharReach(0x21, '0' - 1) - | CharReach('9' + 1, 'A' - 1) - | CharReach('Z' + 1, 'a' - 1) - | CharReach('z' + 1, 126); - case CLASS_XPUNCT: - return to_cr(getPredefinedCodePointSet(c, mode)); - case CLASS_SPACE: - return CharReach("\x09\x0a\x0c\x0b\x0d\x20"); - case CLASS_UPPER: - if (mode.caseless) { - return lower | upper; - } else { - return upper; - } - case CLASS_VERT: - return CharReach("\x0a\x0b\x0c\x0d\x85"); - case CLASS_WORD: - return lower | upper | number | CharReach('_'); - case CLASS_XDIGIT: - return CharReach("0123456789abcdefABCDEF"); - case CLASS_UCP_C: - return to_cr(getUcpC()); - case CLASS_UCP_CC: - return to_cr(getUcpCc()); - case CLASS_UCP_CF: - return to_cr(getUcpCf()); - case CLASS_UCP_CN: - return to_cr(getUcpCn()); - case CLASS_UCP_CO: - return to_cr(getUcpCo()); - case CLASS_UCP_CS: - return to_cr(getUcpCs()); - case CLASS_UCP_L: - return to_cr(getUcpL()); - case CLASS_UCP_L_AND: - return to_cr(getUcpL_and()); - case CLASS_UCP_LL: - return to_cr(getUcpLl()); - case CLASS_UCP_LM: - return to_cr(getUcpLm()); - case CLASS_UCP_LO: - return to_cr(getUcpLo()); - case CLASS_UCP_LT: - return to_cr(getUcpLt()); - case CLASS_UCP_LU: - return to_cr(getUcpLu()); - case CLASS_UCP_M: - return to_cr(getUcpM()); - case CLASS_UCP_MC: - return to_cr(getUcpMc()); - case CLASS_UCP_ME: - return to_cr(getUcpMe()); - case CLASS_UCP_MN: - return to_cr(getUcpMn()); - case CLASS_UCP_N: - return to_cr(getUcpN()); - case CLASS_UCP_ND: - return to_cr(getUcpNd()); - case CLASS_UCP_NL: - return to_cr(getUcpNl()); - case CLASS_UCP_NO: - return to_cr(getUcpNo()); - case CLASS_UCP_P: - return to_cr(getUcpP()); - case CLASS_UCP_PC: - return to_cr(getUcpPc()); - case CLASS_UCP_PD: - return to_cr(getUcpPd()); - case CLASS_UCP_PE: - return to_cr(getUcpPe()); - case CLASS_UCP_PF: - return to_cr(getUcpPf()); - case CLASS_UCP_PI: - return to_cr(getUcpPi()); - case CLASS_UCP_PO: - return to_cr(getUcpPo()); - case CLASS_UCP_PS: - return to_cr(getUcpPs()); - case CLASS_UCP_S: - return to_cr(getUcpS()); - case CLASS_UCP_SC: - return to_cr(getUcpSc()); - case CLASS_UCP_SK: - return to_cr(getUcpSk()); - case CLASS_UCP_SM: - return to_cr(getUcpSm()); - case CLASS_UCP_SO: - return to_cr(getUcpSo()); - case CLASS_UCP_XAN: - return to_cr(getUcpXan()); - case CLASS_UCP_XPS: - case CLASS_UCP_XSP: - return getPredefinedCharReach(CLASS_VERT, mode) | getPredefinedCharReach(CLASS_HORZ, mode); - case CLASS_UCP_XWD: - return to_cr(getUcpXwd()); - case CLASS_UCP_Z: - return to_cr(getUcpZ()); - case CLASS_UCP_ZL: - return to_cr(getUcpZl()); - case CLASS_UCP_ZP: - return to_cr(getUcpZp()); - case CLASS_UCP_ZS: - return to_cr(getUcpZs()); - case CLASS_SCRIPT_ARABIC: - return to_cr(getUcpArabic()); - case CLASS_SCRIPT_ARMENIAN: - return to_cr(getUcpArmenian()); - case CLASS_SCRIPT_AVESTAN: - return to_cr(getUcpAvestan()); - case CLASS_SCRIPT_BALINESE: - return to_cr(getUcpBalinese()); - case CLASS_SCRIPT_BAMUM: - return to_cr(getUcpBamum()); - case CLASS_SCRIPT_BATAK: - return to_cr(getUcpBatak()); - case CLASS_SCRIPT_BENGALI: - return to_cr(getUcpBengali()); - case CLASS_SCRIPT_BOPOMOFO: - return to_cr(getUcpBopomofo()); - case CLASS_SCRIPT_BRAHMI: - return to_cr(getUcpBrahmi()); - case CLASS_SCRIPT_BRAILLE: - return to_cr(getUcpBraille()); - case CLASS_SCRIPT_BUGINESE: - return to_cr(getUcpBuginese()); - case CLASS_SCRIPT_BUHID: - return to_cr(getUcpBuhid()); - case CLASS_SCRIPT_CANADIAN_ABORIGINAL: - return to_cr(getUcpCanadian_Aboriginal()); - case CLASS_SCRIPT_CARIAN: - return to_cr(getUcpCarian()); - case CLASS_SCRIPT_CHAM: - return to_cr(getUcpCham()); - case CLASS_SCRIPT_CHEROKEE: - return to_cr(getUcpCherokee()); - case CLASS_SCRIPT_COMMON: - return to_cr(getUcpCommon()); - case CLASS_SCRIPT_COPTIC: - return to_cr(getUcpCoptic()); - case CLASS_SCRIPT_CUNEIFORM: - return to_cr(getUcpCuneiform()); - case CLASS_SCRIPT_CYPRIOT: - return to_cr(getUcpCypriot()); - case CLASS_SCRIPT_CYRILLIC: - return to_cr(getUcpCyrillic()); - case CLASS_SCRIPT_DESERET: - return to_cr(getUcpDeseret()); - case CLASS_SCRIPT_DEVANAGARI: - return to_cr(getUcpDevanagari()); - case CLASS_SCRIPT_EGYPTIAN_HIEROGLYPHS: - return to_cr(getUcpEgyptian_Hieroglyphs()); - case CLASS_SCRIPT_ETHIOPIC: - return to_cr(getUcpEthiopic()); - case CLASS_SCRIPT_GEORGIAN: - return to_cr(getUcpGeorgian()); - case CLASS_SCRIPT_GLAGOLITIC: - return to_cr(getUcpGlagolitic()); - case CLASS_SCRIPT_GOTHIC: - return to_cr(getUcpGothic()); - case CLASS_SCRIPT_GREEK: - return to_cr(getUcpGreek()); - case CLASS_SCRIPT_GUJARATI: - return to_cr(getUcpGujarati()); - case CLASS_SCRIPT_GURMUKHI: - return to_cr(getUcpGurmukhi()); - case CLASS_SCRIPT_HAN: - return to_cr(getUcpHan()); - case CLASS_SCRIPT_HANGUL: - return to_cr(getUcpHangul()); - case CLASS_SCRIPT_HANUNOO: - return to_cr(getUcpHanunoo()); - case CLASS_SCRIPT_HEBREW: - return to_cr(getUcpHebrew()); - case CLASS_SCRIPT_HIRAGANA: - return to_cr(getUcpHiragana()); - case CLASS_SCRIPT_IMPERIAL_ARAMAIC: - return to_cr(getUcpImperial_Aramaic()); - case CLASS_SCRIPT_INHERITED: - return to_cr(getUcpInherited()); - case CLASS_SCRIPT_INSCRIPTIONAL_PAHLAVI: - return to_cr(getUcpInscriptional_Pahlavi()); - case CLASS_SCRIPT_INSCRIPTIONAL_PARTHIAN: - return to_cr(getUcpInscriptional_Parthian()); - case CLASS_SCRIPT_JAVANESE: - return to_cr(getUcpJavanese()); - case CLASS_SCRIPT_KAITHI: - return to_cr(getUcpKaithi()); - case CLASS_SCRIPT_KANNADA: - return to_cr(getUcpKannada()); - case CLASS_SCRIPT_KATAKANA: - return to_cr(getUcpKatakana()); - case CLASS_SCRIPT_KAYAH_LI: - return to_cr(getUcpKayah_Li()); - case CLASS_SCRIPT_KHAROSHTHI: - return to_cr(getUcpKharoshthi()); - case CLASS_SCRIPT_KHMER: - return to_cr(getUcpKhmer()); - case CLASS_SCRIPT_LAO: - return to_cr(getUcpLao()); - case CLASS_SCRIPT_LATIN: - return to_cr(getUcpLatin()); - case CLASS_SCRIPT_LEPCHA: - return to_cr(getUcpLepcha()); - case CLASS_SCRIPT_LIMBU: - return to_cr(getUcpLimbu()); - case CLASS_SCRIPT_LINEAR_B: - return to_cr(getUcpLinear_B()); - case CLASS_SCRIPT_LISU: - return to_cr(getUcpLisu()); - case CLASS_SCRIPT_LYCIAN: - return to_cr(getUcpLycian()); - case CLASS_SCRIPT_LYDIAN: - return to_cr(getUcpLydian()); - case CLASS_SCRIPT_MALAYALAM: - return to_cr(getUcpMalayalam()); - case CLASS_SCRIPT_MANDAIC: - return to_cr(getUcpMandaic()); - case CLASS_SCRIPT_MEETEI_MAYEK: - return to_cr(getUcpMeetei_Mayek()); - case CLASS_SCRIPT_MONGOLIAN: - return to_cr(getUcpMongolian()); - case CLASS_SCRIPT_MYANMAR: - return to_cr(getUcpMyanmar()); - case CLASS_SCRIPT_NEW_TAI_LUE: - return to_cr(getUcpNew_Tai_Lue()); - case CLASS_SCRIPT_NKO: - return to_cr(getUcpNko()); - case CLASS_SCRIPT_OGHAM: - return to_cr(getUcpOgham()); - case CLASS_SCRIPT_OL_CHIKI: - return to_cr(getUcpOl_Chiki()); - case CLASS_SCRIPT_OLD_ITALIC: - return to_cr(getUcpOld_Italic()); - case CLASS_SCRIPT_OLD_PERSIAN: - return to_cr(getUcpOld_Persian()); - case CLASS_SCRIPT_OLD_SOUTH_ARABIAN: - return to_cr(getUcpOld_South_Arabian()); - case CLASS_SCRIPT_OLD_TURKIC: - return to_cr(getUcpOld_Turkic()); - case CLASS_SCRIPT_ORIYA: - return to_cr(getUcpOriya()); - case CLASS_SCRIPT_OSMANYA: - return to_cr(getUcpOsmanya()); - case CLASS_SCRIPT_PHAGS_PA: - return to_cr(getUcpPhags_Pa()); - case CLASS_SCRIPT_PHOENICIAN: - return to_cr(getUcpPhoenician()); - case CLASS_SCRIPT_REJANG: - return to_cr(getUcpRejang()); - case CLASS_SCRIPT_RUNIC: - return to_cr(getUcpRunic()); - case CLASS_SCRIPT_SAMARITAN: - return to_cr(getUcpSamaritan()); - case CLASS_SCRIPT_SAURASHTRA: - return to_cr(getUcpSaurashtra()); - case CLASS_SCRIPT_SHAVIAN: - return to_cr(getUcpShavian()); - case CLASS_SCRIPT_SINHALA: - return to_cr(getUcpSinhala()); - case CLASS_SCRIPT_SUNDANESE: - return to_cr(getUcpSundanese()); - case CLASS_SCRIPT_SYLOTI_NAGRI: - return to_cr(getUcpSyloti_Nagri()); - case CLASS_SCRIPT_SYRIAC: - return to_cr(getUcpSyriac()); - case CLASS_SCRIPT_TAGALOG: - return to_cr(getUcpTagalog()); - case CLASS_SCRIPT_TAGBANWA: - return to_cr(getUcpTagbanwa()); - case CLASS_SCRIPT_TAI_LE: - return to_cr(getUcpTai_Le()); - case CLASS_SCRIPT_TAI_THAM: - return to_cr(getUcpTai_Tham()); - case CLASS_SCRIPT_TAI_VIET: - return to_cr(getUcpTai_Viet()); - case CLASS_SCRIPT_TAMIL: - return to_cr(getUcpTamil()); - case CLASS_SCRIPT_TELUGU: - return to_cr(getUcpTelugu()); - case CLASS_SCRIPT_THAANA: - return to_cr(getUcpThaana()); - case CLASS_SCRIPT_THAI: - return to_cr(getUcpThai()); - case CLASS_SCRIPT_TIBETAN: - return to_cr(getUcpTibetan()); - case CLASS_SCRIPT_TIFINAGH: - return to_cr(getUcpTifinagh()); - case CLASS_SCRIPT_UGARITIC: - return to_cr(getUcpUgaritic()); - case CLASS_SCRIPT_VAI: - return to_cr(getUcpVai()); - case CLASS_SCRIPT_YI: - return to_cr(getUcpYi()); - case CLASS_UCP_ANY: /* always include newline */ - return ~CharReach(); - } - assert(0); - return CharReach(); -} - -unique_ptr<ComponentClass> getComponentClass(const ParseMode &mode) { - if (mode.utf8) { - return ue2::make_unique<UTF8ComponentClass>(mode); - } else { - return ue2::make_unique<AsciiComponentClass>(mode); - } -} - -unique_ptr<ComponentClass> generateComponent(PredefinedClass c, bool negate, - const ParseMode &mode) { - auto cc = getComponentClass(mode); - cc->add(c, negate); - cc->finalize(); - return cc; -} - -unique_ptr<ComponentClass> getLiteralComponentClass(unsigned char c, - bool nocase) { - ParseMode mode; - mode.caseless = nocase; - auto cc = getComponentClass(mode); - cc->add(c); - cc->finalize(); - return cc; -} - -ComponentClass::ComponentClass(const ParseMode &mode_in) - : m_negate(false), mode(mode_in), in_cand_range(false), - range_start(INVALID_UNICODE), finalized(false) {} - -ComponentClass::~ComponentClass() { } - -void ComponentClass::addDash(void) { - if (!in_cand_range) { - // this could be the start of a range - if (range_start != INVALID_UNICODE) { - in_cand_range = true; - } else { - /* no possible start character for range, this is just a literal */ - add('-'); - } - } else { - // already creating a range, so this must be literal '-' - in_cand_range = false; - createRange('-'); - } -} - -void ComponentClass::negate() { - m_negate = true; -} - -} // namespace ue2 +/* + * Copyright (c) 2015, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/** \file + * \brief Character classes and their mnemonics. + */ +#include "Parser.h" +#include "ComponentClass.h" +#include "AsciiComponentClass.h" +#include "ucp_table.h" +#include "Utf8ComponentClass.h" +#include "util/charreach.h" +#include "util/make_unique.h" + +#include <boost/icl/interval_set.hpp> + +using namespace std; + +namespace ue2 { + +static +CharReach to_cr(const CodePointSet &cps) { + CharReach cr; + for (const auto &cp : cps) { + if (lower(cp) >= CharReach::npos) { + break; + } + + cr.setRange(lower(cp), MIN(upper(cp), CharReach::npos - 1)); + } + + return cr; +} + +CharReach getPredefinedCharReach(PredefinedClass c, const ParseMode &mode) { + const CharReach lower('a', 'z'); + const CharReach upper('A', 'Z'); + const CharReach number('0', '9'); + switch (c) { + case CLASS_ALNUM: + return lower | upper | number; + case CLASS_ALPHA: + return lower | upper; + case CLASS_ANY: + if (mode.dotall) { + return ~CharReach(); + } else { + return ~CharReach('\n'); + } + case CLASS_ASCII: + return CharReach(0, 127); + case CLASS_BLANK: + return CharReach(" \t"); + case CLASS_CNTRL: + return CharReach(0, 31) | CharReach(127 /* del */); + case CLASS_DIGIT: + return number; + case CLASS_GRAPH: + return CharReach(0x21, 0x7e); + case CLASS_XGRAPH: + return to_cr(getPredefinedCodePointSet(c, mode)); + case CLASS_HORZ: + return CharReach("\x09\x20\xA0"); + case CLASS_LOWER: + if (mode.caseless) { + return lower | upper; + } else { + return lower; + } + case CLASS_PRINT: + return CharReach(0x20, 0x7e); + case CLASS_XPRINT: + return to_cr(getPredefinedCodePointSet(c, mode)); + case CLASS_PUNCT: + return CharReach(0x21, '0' - 1) + | CharReach('9' + 1, 'A' - 1) + | CharReach('Z' + 1, 'a' - 1) + | CharReach('z' + 1, 126); + case CLASS_XPUNCT: + return to_cr(getPredefinedCodePointSet(c, mode)); + case CLASS_SPACE: + return CharReach("\x09\x0a\x0c\x0b\x0d\x20"); + case CLASS_UPPER: + if (mode.caseless) { + return lower | upper; + } else { + return upper; + } + case CLASS_VERT: + return CharReach("\x0a\x0b\x0c\x0d\x85"); + case CLASS_WORD: + return lower | upper | number | CharReach('_'); + case CLASS_XDIGIT: + return CharReach("0123456789abcdefABCDEF"); + case CLASS_UCP_C: + return to_cr(getUcpC()); + case CLASS_UCP_CC: + return to_cr(getUcpCc()); + case CLASS_UCP_CF: + return to_cr(getUcpCf()); + case CLASS_UCP_CN: + return to_cr(getUcpCn()); + case CLASS_UCP_CO: + return to_cr(getUcpCo()); + case CLASS_UCP_CS: + return to_cr(getUcpCs()); + case CLASS_UCP_L: + return to_cr(getUcpL()); + case CLASS_UCP_L_AND: + return to_cr(getUcpL_and()); + case CLASS_UCP_LL: + return to_cr(getUcpLl()); + case CLASS_UCP_LM: + return to_cr(getUcpLm()); + case CLASS_UCP_LO: + return to_cr(getUcpLo()); + case CLASS_UCP_LT: + return to_cr(getUcpLt()); + case CLASS_UCP_LU: + return to_cr(getUcpLu()); + case CLASS_UCP_M: + return to_cr(getUcpM()); + case CLASS_UCP_MC: + return to_cr(getUcpMc()); + case CLASS_UCP_ME: + return to_cr(getUcpMe()); + case CLASS_UCP_MN: + return to_cr(getUcpMn()); + case CLASS_UCP_N: + return to_cr(getUcpN()); + case CLASS_UCP_ND: + return to_cr(getUcpNd()); + case CLASS_UCP_NL: + return to_cr(getUcpNl()); + case CLASS_UCP_NO: + return to_cr(getUcpNo()); + case CLASS_UCP_P: + return to_cr(getUcpP()); + case CLASS_UCP_PC: + return to_cr(getUcpPc()); + case CLASS_UCP_PD: + return to_cr(getUcpPd()); + case CLASS_UCP_PE: + return to_cr(getUcpPe()); + case CLASS_UCP_PF: + return to_cr(getUcpPf()); + case CLASS_UCP_PI: + return to_cr(getUcpPi()); + case CLASS_UCP_PO: + return to_cr(getUcpPo()); + case CLASS_UCP_PS: + return to_cr(getUcpPs()); + case CLASS_UCP_S: + return to_cr(getUcpS()); + case CLASS_UCP_SC: + return to_cr(getUcpSc()); + case CLASS_UCP_SK: + return to_cr(getUcpSk()); + case CLASS_UCP_SM: + return to_cr(getUcpSm()); + case CLASS_UCP_SO: + return to_cr(getUcpSo()); + case CLASS_UCP_XAN: + return to_cr(getUcpXan()); + case CLASS_UCP_XPS: + case CLASS_UCP_XSP: + return getPredefinedCharReach(CLASS_VERT, mode) | getPredefinedCharReach(CLASS_HORZ, mode); + case CLASS_UCP_XWD: + return to_cr(getUcpXwd()); + case CLASS_UCP_Z: + return to_cr(getUcpZ()); + case CLASS_UCP_ZL: + return to_cr(getUcpZl()); + case CLASS_UCP_ZP: + return to_cr(getUcpZp()); + case CLASS_UCP_ZS: + return to_cr(getUcpZs()); + case CLASS_SCRIPT_ARABIC: + return to_cr(getUcpArabic()); + case CLASS_SCRIPT_ARMENIAN: + return to_cr(getUcpArmenian()); + case CLASS_SCRIPT_AVESTAN: + return to_cr(getUcpAvestan()); + case CLASS_SCRIPT_BALINESE: + return to_cr(getUcpBalinese()); + case CLASS_SCRIPT_BAMUM: + return to_cr(getUcpBamum()); + case CLASS_SCRIPT_BATAK: + return to_cr(getUcpBatak()); + case CLASS_SCRIPT_BENGALI: + return to_cr(getUcpBengali()); + case CLASS_SCRIPT_BOPOMOFO: + return to_cr(getUcpBopomofo()); + case CLASS_SCRIPT_BRAHMI: + return to_cr(getUcpBrahmi()); + case CLASS_SCRIPT_BRAILLE: + return to_cr(getUcpBraille()); + case CLASS_SCRIPT_BUGINESE: + return to_cr(getUcpBuginese()); + case CLASS_SCRIPT_BUHID: + return to_cr(getUcpBuhid()); + case CLASS_SCRIPT_CANADIAN_ABORIGINAL: + return to_cr(getUcpCanadian_Aboriginal()); + case CLASS_SCRIPT_CARIAN: + return to_cr(getUcpCarian()); + case CLASS_SCRIPT_CHAM: + return to_cr(getUcpCham()); + case CLASS_SCRIPT_CHEROKEE: + return to_cr(getUcpCherokee()); + case CLASS_SCRIPT_COMMON: + return to_cr(getUcpCommon()); + case CLASS_SCRIPT_COPTIC: + return to_cr(getUcpCoptic()); + case CLASS_SCRIPT_CUNEIFORM: + return to_cr(getUcpCuneiform()); + case CLASS_SCRIPT_CYPRIOT: + return to_cr(getUcpCypriot()); + case CLASS_SCRIPT_CYRILLIC: + return to_cr(getUcpCyrillic()); + case CLASS_SCRIPT_DESERET: + return to_cr(getUcpDeseret()); + case CLASS_SCRIPT_DEVANAGARI: + return to_cr(getUcpDevanagari()); + case CLASS_SCRIPT_EGYPTIAN_HIEROGLYPHS: + return to_cr(getUcpEgyptian_Hieroglyphs()); + case CLASS_SCRIPT_ETHIOPIC: + return to_cr(getUcpEthiopic()); + case CLASS_SCRIPT_GEORGIAN: + return to_cr(getUcpGeorgian()); + case CLASS_SCRIPT_GLAGOLITIC: + return to_cr(getUcpGlagolitic()); + case CLASS_SCRIPT_GOTHIC: + return to_cr(getUcpGothic()); + case CLASS_SCRIPT_GREEK: + return to_cr(getUcpGreek()); + case CLASS_SCRIPT_GUJARATI: + return to_cr(getUcpGujarati()); + case CLASS_SCRIPT_GURMUKHI: + return to_cr(getUcpGurmukhi()); + case CLASS_SCRIPT_HAN: + return to_cr(getUcpHan()); + case CLASS_SCRIPT_HANGUL: + return to_cr(getUcpHangul()); + case CLASS_SCRIPT_HANUNOO: + return to_cr(getUcpHanunoo()); + case CLASS_SCRIPT_HEBREW: + return to_cr(getUcpHebrew()); + case CLASS_SCRIPT_HIRAGANA: + return to_cr(getUcpHiragana()); + case CLASS_SCRIPT_IMPERIAL_ARAMAIC: + return to_cr(getUcpImperial_Aramaic()); + case CLASS_SCRIPT_INHERITED: + return to_cr(getUcpInherited()); + case CLASS_SCRIPT_INSCRIPTIONAL_PAHLAVI: + return to_cr(getUcpInscriptional_Pahlavi()); + case CLASS_SCRIPT_INSCRIPTIONAL_PARTHIAN: + return to_cr(getUcpInscriptional_Parthian()); + case CLASS_SCRIPT_JAVANESE: + return to_cr(getUcpJavanese()); + case CLASS_SCRIPT_KAITHI: + return to_cr(getUcpKaithi()); + case CLASS_SCRIPT_KANNADA: + return to_cr(getUcpKannada()); + case CLASS_SCRIPT_KATAKANA: + return to_cr(getUcpKatakana()); + case CLASS_SCRIPT_KAYAH_LI: + return to_cr(getUcpKayah_Li()); + case CLASS_SCRIPT_KHAROSHTHI: + return to_cr(getUcpKharoshthi()); + case CLASS_SCRIPT_KHMER: + return to_cr(getUcpKhmer()); + case CLASS_SCRIPT_LAO: + return to_cr(getUcpLao()); + case CLASS_SCRIPT_LATIN: + return to_cr(getUcpLatin()); + case CLASS_SCRIPT_LEPCHA: + return to_cr(getUcpLepcha()); + case CLASS_SCRIPT_LIMBU: + return to_cr(getUcpLimbu()); + case CLASS_SCRIPT_LINEAR_B: + return to_cr(getUcpLinear_B()); + case CLASS_SCRIPT_LISU: + return to_cr(getUcpLisu()); + case CLASS_SCRIPT_LYCIAN: + return to_cr(getUcpLycian()); + case CLASS_SCRIPT_LYDIAN: + return to_cr(getUcpLydian()); + case CLASS_SCRIPT_MALAYALAM: + return to_cr(getUcpMalayalam()); + case CLASS_SCRIPT_MANDAIC: + return to_cr(getUcpMandaic()); + case CLASS_SCRIPT_MEETEI_MAYEK: + return to_cr(getUcpMeetei_Mayek()); + case CLASS_SCRIPT_MONGOLIAN: + return to_cr(getUcpMongolian()); + case CLASS_SCRIPT_MYANMAR: + return to_cr(getUcpMyanmar()); + case CLASS_SCRIPT_NEW_TAI_LUE: + return to_cr(getUcpNew_Tai_Lue()); + case CLASS_SCRIPT_NKO: + return to_cr(getUcpNko()); + case CLASS_SCRIPT_OGHAM: + return to_cr(getUcpOgham()); + case CLASS_SCRIPT_OL_CHIKI: + return to_cr(getUcpOl_Chiki()); + case CLASS_SCRIPT_OLD_ITALIC: + return to_cr(getUcpOld_Italic()); + case CLASS_SCRIPT_OLD_PERSIAN: + return to_cr(getUcpOld_Persian()); + case CLASS_SCRIPT_OLD_SOUTH_ARABIAN: + return to_cr(getUcpOld_South_Arabian()); + case CLASS_SCRIPT_OLD_TURKIC: + return to_cr(getUcpOld_Turkic()); + case CLASS_SCRIPT_ORIYA: + return to_cr(getUcpOriya()); + case CLASS_SCRIPT_OSMANYA: + return to_cr(getUcpOsmanya()); + case CLASS_SCRIPT_PHAGS_PA: + return to_cr(getUcpPhags_Pa()); + case CLASS_SCRIPT_PHOENICIAN: + return to_cr(getUcpPhoenician()); + case CLASS_SCRIPT_REJANG: + return to_cr(getUcpRejang()); + case CLASS_SCRIPT_RUNIC: + return to_cr(getUcpRunic()); + case CLASS_SCRIPT_SAMARITAN: + return to_cr(getUcpSamaritan()); + case CLASS_SCRIPT_SAURASHTRA: + return to_cr(getUcpSaurashtra()); + case CLASS_SCRIPT_SHAVIAN: + return to_cr(getUcpShavian()); + case CLASS_SCRIPT_SINHALA: + return to_cr(getUcpSinhala()); + case CLASS_SCRIPT_SUNDANESE: + return to_cr(getUcpSundanese()); + case CLASS_SCRIPT_SYLOTI_NAGRI: + return to_cr(getUcpSyloti_Nagri()); + case CLASS_SCRIPT_SYRIAC: + return to_cr(getUcpSyriac()); + case CLASS_SCRIPT_TAGALOG: + return to_cr(getUcpTagalog()); + case CLASS_SCRIPT_TAGBANWA: + return to_cr(getUcpTagbanwa()); + case CLASS_SCRIPT_TAI_LE: + return to_cr(getUcpTai_Le()); + case CLASS_SCRIPT_TAI_THAM: + return to_cr(getUcpTai_Tham()); + case CLASS_SCRIPT_TAI_VIET: + return to_cr(getUcpTai_Viet()); + case CLASS_SCRIPT_TAMIL: + return to_cr(getUcpTamil()); + case CLASS_SCRIPT_TELUGU: + return to_cr(getUcpTelugu()); + case CLASS_SCRIPT_THAANA: + return to_cr(getUcpThaana()); + case CLASS_SCRIPT_THAI: + return to_cr(getUcpThai()); + case CLASS_SCRIPT_TIBETAN: + return to_cr(getUcpTibetan()); + case CLASS_SCRIPT_TIFINAGH: + return to_cr(getUcpTifinagh()); + case CLASS_SCRIPT_UGARITIC: + return to_cr(getUcpUgaritic()); + case CLASS_SCRIPT_VAI: + return to_cr(getUcpVai()); + case CLASS_SCRIPT_YI: + return to_cr(getUcpYi()); + case CLASS_UCP_ANY: /* always include newline */ + return ~CharReach(); + } + assert(0); + return CharReach(); +} + +unique_ptr<ComponentClass> getComponentClass(const ParseMode &mode) { + if (mode.utf8) { + return ue2::make_unique<UTF8ComponentClass>(mode); + } else { + return ue2::make_unique<AsciiComponentClass>(mode); + } +} + +unique_ptr<ComponentClass> generateComponent(PredefinedClass c, bool negate, + const ParseMode &mode) { + auto cc = getComponentClass(mode); + cc->add(c, negate); + cc->finalize(); + return cc; +} + +unique_ptr<ComponentClass> getLiteralComponentClass(unsigned char c, + bool nocase) { + ParseMode mode; + mode.caseless = nocase; + auto cc = getComponentClass(mode); + cc->add(c); + cc->finalize(); + return cc; +} + +ComponentClass::ComponentClass(const ParseMode &mode_in) + : m_negate(false), mode(mode_in), in_cand_range(false), + range_start(INVALID_UNICODE), finalized(false) {} + +ComponentClass::~ComponentClass() { } + +void ComponentClass::addDash(void) { + if (!in_cand_range) { + // this could be the start of a range + if (range_start != INVALID_UNICODE) { + in_cand_range = true; + } else { + /* no possible start character for range, this is just a literal */ + add('-'); + } + } else { + // already creating a range, so this must be literal '-' + in_cand_range = false; + createRange('-'); + } +} + +void ComponentClass::negate() { + m_negate = true; +} + +} // namespace ue2 diff --git a/contrib/libs/hyperscan/src/parser/ComponentClass.h b/contrib/libs/hyperscan/src/parser/ComponentClass.h index 34330a1c29..040e6d786c 100644 --- a/contrib/libs/hyperscan/src/parser/ComponentClass.h +++ b/contrib/libs/hyperscan/src/parser/ComponentClass.h @@ -1,280 +1,280 @@ -/* - * Copyright (c) 2015, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -/** \file - * \brief Character classes and their mnemonics. - */ - -#ifndef COMPONENTCLASS_H -#define COMPONENTCLASS_H - -#include <string> -#include <vector> -#include <utility> - -#include "Component.h" -#include "Parser.h" -#include "util/charreach.h" -#include "util/unicode_def.h" -#include "ue2common.h" - -namespace ue2 { - -enum PredefinedClass { - CLASS_ALNUM, - CLASS_ALPHA, - CLASS_ANY, /* dot, not quite any when not in dotall mode */ - CLASS_ASCII, - CLASS_BLANK, - CLASS_CNTRL, - CLASS_DIGIT, - CLASS_GRAPH, - CLASS_HORZ, - CLASS_LOWER, - CLASS_PRINT, - CLASS_PUNCT, - CLASS_SPACE, /* has vertical tab */ - CLASS_UPPER, - CLASS_VERT, - CLASS_WORD, - CLASS_XDIGIT, - CLASS_XGRAPH, /* [:graph:] in UCP mode */ - CLASS_XPRINT, /* [:print:] in UCP mode */ - CLASS_XPUNCT, /* [:punct:] in UCP mode */ - CLASS_UCP_C, - CLASS_UCP_CC, - CLASS_UCP_CF, - CLASS_UCP_CN, /* unallocated code points */ - CLASS_UCP_CO, - CLASS_UCP_CS, /* does not contain valid unicode codepoints */ - CLASS_UCP_L, - CLASS_UCP_LL, - CLASS_UCP_LM, - CLASS_UCP_LO, - CLASS_UCP_LT, - CLASS_UCP_LU, - CLASS_UCP_L_AND, /* L& = LL+LU+LT */ - CLASS_UCP_M, - CLASS_UCP_MC, - CLASS_UCP_ME, - CLASS_UCP_MN, - CLASS_UCP_N, - CLASS_UCP_ND, - CLASS_UCP_NL, - CLASS_UCP_NO, - CLASS_UCP_P, - CLASS_UCP_PC, - CLASS_UCP_PD, - CLASS_UCP_PE, - CLASS_UCP_PF, - CLASS_UCP_PI, - CLASS_UCP_PO, - CLASS_UCP_PS, - CLASS_UCP_S, - CLASS_UCP_SC, - CLASS_UCP_SK, - CLASS_UCP_SM, - CLASS_UCP_SO, - CLASS_UCP_Z, - CLASS_UCP_ZL, - CLASS_UCP_ZP, - CLASS_UCP_ZS, - CLASS_UCP_XAN, - CLASS_UCP_XPS, /* CLASS_SPACE */ - CLASS_UCP_XSP, - CLASS_UCP_XWD, - CLASS_SCRIPT_ARABIC, - CLASS_SCRIPT_ARMENIAN, - CLASS_SCRIPT_AVESTAN, - CLASS_SCRIPT_BALINESE, - CLASS_SCRIPT_BAMUM, - CLASS_SCRIPT_BATAK, - CLASS_SCRIPT_BENGALI, - CLASS_SCRIPT_BOPOMOFO, - CLASS_SCRIPT_BRAHMI, - CLASS_SCRIPT_BRAILLE, - CLASS_SCRIPT_BUGINESE, - CLASS_SCRIPT_BUHID, - CLASS_SCRIPT_CANADIAN_ABORIGINAL, - CLASS_SCRIPT_CARIAN, - CLASS_SCRIPT_CHAM, - CLASS_SCRIPT_CHEROKEE, - CLASS_SCRIPT_COMMON, - CLASS_SCRIPT_COPTIC, - CLASS_SCRIPT_CUNEIFORM, - CLASS_SCRIPT_CYPRIOT, - CLASS_SCRIPT_CYRILLIC, - CLASS_SCRIPT_DESERET, - CLASS_SCRIPT_DEVANAGARI, - CLASS_SCRIPT_EGYPTIAN_HIEROGLYPHS, - CLASS_SCRIPT_ETHIOPIC, - CLASS_SCRIPT_GEORGIAN, - CLASS_SCRIPT_GLAGOLITIC, - CLASS_SCRIPT_GOTHIC, - CLASS_SCRIPT_GREEK, - CLASS_SCRIPT_GUJARATI, - CLASS_SCRIPT_GURMUKHI, - CLASS_SCRIPT_HAN, - CLASS_SCRIPT_HANGUL, - CLASS_SCRIPT_HANUNOO, - CLASS_SCRIPT_HEBREW, - CLASS_SCRIPT_HIRAGANA, - CLASS_SCRIPT_IMPERIAL_ARAMAIC, - CLASS_SCRIPT_INHERITED, - CLASS_SCRIPT_INSCRIPTIONAL_PAHLAVI, - CLASS_SCRIPT_INSCRIPTIONAL_PARTHIAN, - CLASS_SCRIPT_JAVANESE, - CLASS_SCRIPT_KAITHI, - CLASS_SCRIPT_KANNADA, - CLASS_SCRIPT_KATAKANA, - CLASS_SCRIPT_KAYAH_LI, - CLASS_SCRIPT_KHAROSHTHI, - CLASS_SCRIPT_KHMER, - CLASS_SCRIPT_LAO, - CLASS_SCRIPT_LATIN, - CLASS_SCRIPT_LEPCHA, - CLASS_SCRIPT_LIMBU, - CLASS_SCRIPT_LINEAR_B, - CLASS_SCRIPT_LISU, - CLASS_SCRIPT_LYCIAN, - CLASS_SCRIPT_LYDIAN, - CLASS_SCRIPT_MALAYALAM, - CLASS_SCRIPT_MANDAIC, - CLASS_SCRIPT_MEETEI_MAYEK, - CLASS_SCRIPT_MONGOLIAN, - CLASS_SCRIPT_MYANMAR, - CLASS_SCRIPT_NEW_TAI_LUE, - CLASS_SCRIPT_NKO, - CLASS_SCRIPT_OGHAM, - CLASS_SCRIPT_OL_CHIKI, - CLASS_SCRIPT_OLD_ITALIC, - CLASS_SCRIPT_OLD_PERSIAN, - CLASS_SCRIPT_OLD_SOUTH_ARABIAN, - CLASS_SCRIPT_OLD_TURKIC, - CLASS_SCRIPT_ORIYA, - CLASS_SCRIPT_OSMANYA, - CLASS_SCRIPT_PHAGS_PA, - CLASS_SCRIPT_PHOENICIAN, - CLASS_SCRIPT_REJANG, - CLASS_SCRIPT_RUNIC, - CLASS_SCRIPT_SAMARITAN, - CLASS_SCRIPT_SAURASHTRA, - CLASS_SCRIPT_SHAVIAN, - CLASS_SCRIPT_SINHALA, - CLASS_SCRIPT_SUNDANESE, - CLASS_SCRIPT_SYLOTI_NAGRI, - CLASS_SCRIPT_SYRIAC, - CLASS_SCRIPT_TAGALOG, - CLASS_SCRIPT_TAGBANWA, - CLASS_SCRIPT_TAI_LE, - CLASS_SCRIPT_TAI_THAM, - CLASS_SCRIPT_TAI_VIET, - CLASS_SCRIPT_TAMIL, - CLASS_SCRIPT_TELUGU, - CLASS_SCRIPT_THAANA, - CLASS_SCRIPT_THAI, - CLASS_SCRIPT_TIBETAN, - CLASS_SCRIPT_TIFINAGH, - CLASS_SCRIPT_UGARITIC, - CLASS_SCRIPT_VAI, - CLASS_SCRIPT_YI, - CLASS_UCP_ANY -}; - -CharReach getPredefinedCharReach(PredefinedClass c, const ParseMode &mode); - -class ComponentClass; -class NFABuilder; - -/* Caller is responsible for lifecycle management, class finalized */ -std::unique_ptr<ComponentClass> -generateComponent(PredefinedClass c, bool negated, const ParseMode &mode); - -/* Caller is responsible for lifecycle management, class open */ -std::unique_ptr<ComponentClass> getComponentClass(const ParseMode &mode); - -/** Common case: generate a component for a single literal character, possibly - * in caseless mode. Caller is responsible for lifecycle management. */ -std::unique_ptr<ComponentClass> getLiteralComponentClass(unsigned char c, - bool nocase); - -class ComponentClass : public Component { - friend class DumpVisitor; -protected: - explicit ComponentClass(const ParseMode &mode_in); -public: - ~ComponentClass() override; - ComponentClass *clone() const override = 0; - - Component *accept(ComponentVisitor &v) override = 0; - void accept(ConstComponentVisitor &v) const override = 0; - - /** \brief True if the class contains no members (i.e. it will not match - * against anything). This function can only be called on a finalized - * class. - * - * Note: This is a different concept to Component::empty. - */ - virtual bool class_empty(void) const = 0; - - virtual void add(PredefinedClass c, bool negated) = 0; - virtual void add(unichar c) = 0; /* may throw LocatedParseError */ - void addDash(void); - - void negate(void); - virtual void finalize(void) = 0; - - bool isNegated() const { return m_negate; } - - std::vector<PositionInfo> first() const override = 0; - std::vector<PositionInfo> last() const override = 0; - bool empty() const override { return false; } /* always 1 codepoint wide */ - - void notePositions(GlushkovBuildState &bs) override = 0; - void buildFollowSet(GlushkovBuildState &bs, - const std::vector<PositionInfo> &) override = 0; - -protected: - bool m_negate; - const ParseMode mode; - bool in_cand_range; - unichar range_start; - bool finalized; - - virtual void createRange(unichar) = 0; - - // Protected copy ctor. Use clone instead. - ComponentClass(const ComponentClass &other) - : Component(other), m_negate(other.m_negate), mode(other.mode), - in_cand_range(other.in_cand_range), range_start(other.range_start), - finalized(other.finalized) {} -}; - -} // namespace ue2 - -#endif // COMPONENTCLASS_H +/* + * Copyright (c) 2015, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/** \file + * \brief Character classes and their mnemonics. + */ + +#ifndef COMPONENTCLASS_H +#define COMPONENTCLASS_H + +#include <string> +#include <vector> +#include <utility> + +#include "Component.h" +#include "Parser.h" +#include "util/charreach.h" +#include "util/unicode_def.h" +#include "ue2common.h" + +namespace ue2 { + +enum PredefinedClass { + CLASS_ALNUM, + CLASS_ALPHA, + CLASS_ANY, /* dot, not quite any when not in dotall mode */ + CLASS_ASCII, + CLASS_BLANK, + CLASS_CNTRL, + CLASS_DIGIT, + CLASS_GRAPH, + CLASS_HORZ, + CLASS_LOWER, + CLASS_PRINT, + CLASS_PUNCT, + CLASS_SPACE, /* has vertical tab */ + CLASS_UPPER, + CLASS_VERT, + CLASS_WORD, + CLASS_XDIGIT, + CLASS_XGRAPH, /* [:graph:] in UCP mode */ + CLASS_XPRINT, /* [:print:] in UCP mode */ + CLASS_XPUNCT, /* [:punct:] in UCP mode */ + CLASS_UCP_C, + CLASS_UCP_CC, + CLASS_UCP_CF, + CLASS_UCP_CN, /* unallocated code points */ + CLASS_UCP_CO, + CLASS_UCP_CS, /* does not contain valid unicode codepoints */ + CLASS_UCP_L, + CLASS_UCP_LL, + CLASS_UCP_LM, + CLASS_UCP_LO, + CLASS_UCP_LT, + CLASS_UCP_LU, + CLASS_UCP_L_AND, /* L& = LL+LU+LT */ + CLASS_UCP_M, + CLASS_UCP_MC, + CLASS_UCP_ME, + CLASS_UCP_MN, + CLASS_UCP_N, + CLASS_UCP_ND, + CLASS_UCP_NL, + CLASS_UCP_NO, + CLASS_UCP_P, + CLASS_UCP_PC, + CLASS_UCP_PD, + CLASS_UCP_PE, + CLASS_UCP_PF, + CLASS_UCP_PI, + CLASS_UCP_PO, + CLASS_UCP_PS, + CLASS_UCP_S, + CLASS_UCP_SC, + CLASS_UCP_SK, + CLASS_UCP_SM, + CLASS_UCP_SO, + CLASS_UCP_Z, + CLASS_UCP_ZL, + CLASS_UCP_ZP, + CLASS_UCP_ZS, + CLASS_UCP_XAN, + CLASS_UCP_XPS, /* CLASS_SPACE */ + CLASS_UCP_XSP, + CLASS_UCP_XWD, + CLASS_SCRIPT_ARABIC, + CLASS_SCRIPT_ARMENIAN, + CLASS_SCRIPT_AVESTAN, + CLASS_SCRIPT_BALINESE, + CLASS_SCRIPT_BAMUM, + CLASS_SCRIPT_BATAK, + CLASS_SCRIPT_BENGALI, + CLASS_SCRIPT_BOPOMOFO, + CLASS_SCRIPT_BRAHMI, + CLASS_SCRIPT_BRAILLE, + CLASS_SCRIPT_BUGINESE, + CLASS_SCRIPT_BUHID, + CLASS_SCRIPT_CANADIAN_ABORIGINAL, + CLASS_SCRIPT_CARIAN, + CLASS_SCRIPT_CHAM, + CLASS_SCRIPT_CHEROKEE, + CLASS_SCRIPT_COMMON, + CLASS_SCRIPT_COPTIC, + CLASS_SCRIPT_CUNEIFORM, + CLASS_SCRIPT_CYPRIOT, + CLASS_SCRIPT_CYRILLIC, + CLASS_SCRIPT_DESERET, + CLASS_SCRIPT_DEVANAGARI, + CLASS_SCRIPT_EGYPTIAN_HIEROGLYPHS, + CLASS_SCRIPT_ETHIOPIC, + CLASS_SCRIPT_GEORGIAN, + CLASS_SCRIPT_GLAGOLITIC, + CLASS_SCRIPT_GOTHIC, + CLASS_SCRIPT_GREEK, + CLASS_SCRIPT_GUJARATI, + CLASS_SCRIPT_GURMUKHI, + CLASS_SCRIPT_HAN, + CLASS_SCRIPT_HANGUL, + CLASS_SCRIPT_HANUNOO, + CLASS_SCRIPT_HEBREW, + CLASS_SCRIPT_HIRAGANA, + CLASS_SCRIPT_IMPERIAL_ARAMAIC, + CLASS_SCRIPT_INHERITED, + CLASS_SCRIPT_INSCRIPTIONAL_PAHLAVI, + CLASS_SCRIPT_INSCRIPTIONAL_PARTHIAN, + CLASS_SCRIPT_JAVANESE, + CLASS_SCRIPT_KAITHI, + CLASS_SCRIPT_KANNADA, + CLASS_SCRIPT_KATAKANA, + CLASS_SCRIPT_KAYAH_LI, + CLASS_SCRIPT_KHAROSHTHI, + CLASS_SCRIPT_KHMER, + CLASS_SCRIPT_LAO, + CLASS_SCRIPT_LATIN, + CLASS_SCRIPT_LEPCHA, + CLASS_SCRIPT_LIMBU, + CLASS_SCRIPT_LINEAR_B, + CLASS_SCRIPT_LISU, + CLASS_SCRIPT_LYCIAN, + CLASS_SCRIPT_LYDIAN, + CLASS_SCRIPT_MALAYALAM, + CLASS_SCRIPT_MANDAIC, + CLASS_SCRIPT_MEETEI_MAYEK, + CLASS_SCRIPT_MONGOLIAN, + CLASS_SCRIPT_MYANMAR, + CLASS_SCRIPT_NEW_TAI_LUE, + CLASS_SCRIPT_NKO, + CLASS_SCRIPT_OGHAM, + CLASS_SCRIPT_OL_CHIKI, + CLASS_SCRIPT_OLD_ITALIC, + CLASS_SCRIPT_OLD_PERSIAN, + CLASS_SCRIPT_OLD_SOUTH_ARABIAN, + CLASS_SCRIPT_OLD_TURKIC, + CLASS_SCRIPT_ORIYA, + CLASS_SCRIPT_OSMANYA, + CLASS_SCRIPT_PHAGS_PA, + CLASS_SCRIPT_PHOENICIAN, + CLASS_SCRIPT_REJANG, + CLASS_SCRIPT_RUNIC, + CLASS_SCRIPT_SAMARITAN, + CLASS_SCRIPT_SAURASHTRA, + CLASS_SCRIPT_SHAVIAN, + CLASS_SCRIPT_SINHALA, + CLASS_SCRIPT_SUNDANESE, + CLASS_SCRIPT_SYLOTI_NAGRI, + CLASS_SCRIPT_SYRIAC, + CLASS_SCRIPT_TAGALOG, + CLASS_SCRIPT_TAGBANWA, + CLASS_SCRIPT_TAI_LE, + CLASS_SCRIPT_TAI_THAM, + CLASS_SCRIPT_TAI_VIET, + CLASS_SCRIPT_TAMIL, + CLASS_SCRIPT_TELUGU, + CLASS_SCRIPT_THAANA, + CLASS_SCRIPT_THAI, + CLASS_SCRIPT_TIBETAN, + CLASS_SCRIPT_TIFINAGH, + CLASS_SCRIPT_UGARITIC, + CLASS_SCRIPT_VAI, + CLASS_SCRIPT_YI, + CLASS_UCP_ANY +}; + +CharReach getPredefinedCharReach(PredefinedClass c, const ParseMode &mode); + +class ComponentClass; +class NFABuilder; + +/* Caller is responsible for lifecycle management, class finalized */ +std::unique_ptr<ComponentClass> +generateComponent(PredefinedClass c, bool negated, const ParseMode &mode); + +/* Caller is responsible for lifecycle management, class open */ +std::unique_ptr<ComponentClass> getComponentClass(const ParseMode &mode); + +/** Common case: generate a component for a single literal character, possibly + * in caseless mode. Caller is responsible for lifecycle management. */ +std::unique_ptr<ComponentClass> getLiteralComponentClass(unsigned char c, + bool nocase); + +class ComponentClass : public Component { + friend class DumpVisitor; +protected: + explicit ComponentClass(const ParseMode &mode_in); +public: + ~ComponentClass() override; + ComponentClass *clone() const override = 0; + + Component *accept(ComponentVisitor &v) override = 0; + void accept(ConstComponentVisitor &v) const override = 0; + + /** \brief True if the class contains no members (i.e. it will not match + * against anything). This function can only be called on a finalized + * class. + * + * Note: This is a different concept to Component::empty. + */ + virtual bool class_empty(void) const = 0; + + virtual void add(PredefinedClass c, bool negated) = 0; + virtual void add(unichar c) = 0; /* may throw LocatedParseError */ + void addDash(void); + + void negate(void); + virtual void finalize(void) = 0; + + bool isNegated() const { return m_negate; } + + std::vector<PositionInfo> first() const override = 0; + std::vector<PositionInfo> last() const override = 0; + bool empty() const override { return false; } /* always 1 codepoint wide */ + + void notePositions(GlushkovBuildState &bs) override = 0; + void buildFollowSet(GlushkovBuildState &bs, + const std::vector<PositionInfo> &) override = 0; + +protected: + bool m_negate; + const ParseMode mode; + bool in_cand_range; + unichar range_start; + bool finalized; + + virtual void createRange(unichar) = 0; + + // Protected copy ctor. Use clone instead. + ComponentClass(const ComponentClass &other) + : Component(other), m_negate(other.m_negate), mode(other.mode), + in_cand_range(other.in_cand_range), range_start(other.range_start), + finalized(other.finalized) {} +}; + +} // namespace ue2 + +#endif // COMPONENTCLASS_H diff --git a/contrib/libs/hyperscan/src/parser/ComponentCondReference.cpp b/contrib/libs/hyperscan/src/parser/ComponentCondReference.cpp index d4fbb8711c..2a2ed4e093 100644 --- a/contrib/libs/hyperscan/src/parser/ComponentCondReference.cpp +++ b/contrib/libs/hyperscan/src/parser/ComponentCondReference.cpp @@ -1,166 +1,166 @@ -/* - * Copyright (c) 2015, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -/** \file - * \brief Conditional reference. - */ -#include "ComponentCondReference.h" -#include "ComponentAlternation.h" -#include "ComponentAssertion.h" -#include "parse_error.h" -#include "position_info.h" - -#include <algorithm> -#include <cassert> -#include <memory> - -using namespace std; - -namespace ue2 { - -ComponentCondReference::ComponentCondReference(unsigned ref) - : kind(CONDITION_NUMBER), ref_id(ref), hasBothBranches(false) {} - -ComponentCondReference::ComponentCondReference(const string &name) - : kind(CONDITION_NAME), ref_id(0), ref_name(name), hasBothBranches(false) {} - -ComponentCondReference::ComponentCondReference(unique_ptr<Component> c) - : kind(CONDITION_ASSERTION), ref_id(0), assertion(move(c)), - hasBothBranches(false) {} - -ComponentCondReference::~ComponentCondReference() {} - -ComponentCondReference::ComponentCondReference( - const ComponentCondReference &other) - : ComponentSequence(other), kind(other.kind), ref_id(other.ref_id), - ref_name(other.ref_name), hasBothBranches(other.hasBothBranches) { - if (kind == CONDITION_ASSERTION) { - assert(other.assertion); - assertion.reset(other.assertion->clone()); - } else { - assert(!other.assertion); - } -} - -ComponentCondReference *ComponentCondReference::clone() const { - return new ComponentCondReference(*this); -} - -Component *ComponentCondReference::accept(ComponentVisitor &v) { - Component *c = v.visit(this); - if (c != this) { - v.post(this); - return c; - } - - if (kind == CONDITION_ASSERTION) { - Component *a = assertion.get(); - c = assertion->accept(v); - if (c != a) { - assertion.reset(c); - } - } - - for (auto i = children.begin(), e = children.end(); i != e; ++i) { - Component *child = i->get(); - c = (*i)->accept(v); - if (c != child) { - // Child has been replaced (new Component pointer) or we've been - // instructed to delete it (null). - i->reset(c); - } - } - - // Remove deleted children. - children.erase(remove(children.begin(), children.end(), nullptr), - children.end()); - - v.post(this); - return this; -} - -void ComponentCondReference::accept(ConstComponentVisitor &v) const { - v.pre(*this); - - if (kind == CONDITION_ASSERTION) { - assertion->accept(v); - v.during(*this); // FIXME: a good idea? - } - - for (auto i = children.begin(), e = children.end(); i != e; ++i) { - (*i)->accept(v); - if (i + 1 != e) { - v.during(*this); - } - } - - v.post(*this); -} - -void ComponentCondReference::addAlternation() { - if (alternation) { - if (ref_name == "DEFINE") { - throw LocatedParseError("DEFINE conditional group with more than " - "one branch"); - } - - if (alternation->numBranches() >= 2) { - throw LocatedParseError("Conditional with more than two branches"); - } - } - hasBothBranches = true; - ComponentSequence::addAlternation(); -} - -vector<PositionInfo> ComponentCondReference::first() const { - assert(0); - return vector<PositionInfo>(); -} - -vector<PositionInfo> ComponentCondReference::last() const { - assert(0); - return vector<PositionInfo>(); -} - -bool ComponentCondReference::empty() const { return true; } - -void ComponentCondReference::notePositions(GlushkovBuildState &) { assert(0); } - -void ComponentCondReference::buildFollowSet(GlushkovBuildState &, - const vector<PositionInfo> &) { - assert(0); -} - -bool ComponentCondReference::repeatable() const { - // If this assertion has no children (it's an empty sequence, like that - // produced by '(?!)') then PCRE would throw a "nothing to repeat" error. - // So we do as well. - return !children.empty(); -} - -} // namespace ue2 +/* + * Copyright (c) 2015, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/** \file + * \brief Conditional reference. + */ +#include "ComponentCondReference.h" +#include "ComponentAlternation.h" +#include "ComponentAssertion.h" +#include "parse_error.h" +#include "position_info.h" + +#include <algorithm> +#include <cassert> +#include <memory> + +using namespace std; + +namespace ue2 { + +ComponentCondReference::ComponentCondReference(unsigned ref) + : kind(CONDITION_NUMBER), ref_id(ref), hasBothBranches(false) {} + +ComponentCondReference::ComponentCondReference(const string &name) + : kind(CONDITION_NAME), ref_id(0), ref_name(name), hasBothBranches(false) {} + +ComponentCondReference::ComponentCondReference(unique_ptr<Component> c) + : kind(CONDITION_ASSERTION), ref_id(0), assertion(move(c)), + hasBothBranches(false) {} + +ComponentCondReference::~ComponentCondReference() {} + +ComponentCondReference::ComponentCondReference( + const ComponentCondReference &other) + : ComponentSequence(other), kind(other.kind), ref_id(other.ref_id), + ref_name(other.ref_name), hasBothBranches(other.hasBothBranches) { + if (kind == CONDITION_ASSERTION) { + assert(other.assertion); + assertion.reset(other.assertion->clone()); + } else { + assert(!other.assertion); + } +} + +ComponentCondReference *ComponentCondReference::clone() const { + return new ComponentCondReference(*this); +} + +Component *ComponentCondReference::accept(ComponentVisitor &v) { + Component *c = v.visit(this); + if (c != this) { + v.post(this); + return c; + } + + if (kind == CONDITION_ASSERTION) { + Component *a = assertion.get(); + c = assertion->accept(v); + if (c != a) { + assertion.reset(c); + } + } + + for (auto i = children.begin(), e = children.end(); i != e; ++i) { + Component *child = i->get(); + c = (*i)->accept(v); + if (c != child) { + // Child has been replaced (new Component pointer) or we've been + // instructed to delete it (null). + i->reset(c); + } + } + + // Remove deleted children. + children.erase(remove(children.begin(), children.end(), nullptr), + children.end()); + + v.post(this); + return this; +} + +void ComponentCondReference::accept(ConstComponentVisitor &v) const { + v.pre(*this); + + if (kind == CONDITION_ASSERTION) { + assertion->accept(v); + v.during(*this); // FIXME: a good idea? + } + + for (auto i = children.begin(), e = children.end(); i != e; ++i) { + (*i)->accept(v); + if (i + 1 != e) { + v.during(*this); + } + } + + v.post(*this); +} + +void ComponentCondReference::addAlternation() { + if (alternation) { + if (ref_name == "DEFINE") { + throw LocatedParseError("DEFINE conditional group with more than " + "one branch"); + } + + if (alternation->numBranches() >= 2) { + throw LocatedParseError("Conditional with more than two branches"); + } + } + hasBothBranches = true; + ComponentSequence::addAlternation(); +} + +vector<PositionInfo> ComponentCondReference::first() const { + assert(0); + return vector<PositionInfo>(); +} + +vector<PositionInfo> ComponentCondReference::last() const { + assert(0); + return vector<PositionInfo>(); +} + +bool ComponentCondReference::empty() const { return true; } + +void ComponentCondReference::notePositions(GlushkovBuildState &) { assert(0); } + +void ComponentCondReference::buildFollowSet(GlushkovBuildState &, + const vector<PositionInfo> &) { + assert(0); +} + +bool ComponentCondReference::repeatable() const { + // If this assertion has no children (it's an empty sequence, like that + // produced by '(?!)') then PCRE would throw a "nothing to repeat" error. + // So we do as well. + return !children.empty(); +} + +} // namespace ue2 diff --git a/contrib/libs/hyperscan/src/parser/ComponentCondReference.h b/contrib/libs/hyperscan/src/parser/ComponentCondReference.h index bcdb87382b..c0ee9ac3ac 100644 --- a/contrib/libs/hyperscan/src/parser/ComponentCondReference.h +++ b/contrib/libs/hyperscan/src/parser/ComponentCondReference.h @@ -1,91 +1,91 @@ -/* +/* * Copyright (c) 2015-2016, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -/** \file - * \brief Conditional reference. - */ - -#ifndef PARSER_COMPONENTCONDREFERENCE_H_ -#define PARSER_COMPONENTCONDREFERENCE_H_ - -#include "ComponentSequence.h" - -#include <memory> -#include <string> - -namespace ue2 { - -class ComponentCondReference : public ComponentSequence { - friend class DumpVisitor; - friend class PrefilterVisitor; - friend class ReferenceVisitor; - friend class PrintVisitor; -public: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/** \file + * \brief Conditional reference. + */ + +#ifndef PARSER_COMPONENTCONDREFERENCE_H_ +#define PARSER_COMPONENTCONDREFERENCE_H_ + +#include "ComponentSequence.h" + +#include <memory> +#include <string> + +namespace ue2 { + +class ComponentCondReference : public ComponentSequence { + friend class DumpVisitor; + friend class PrefilterVisitor; + friend class ReferenceVisitor; + friend class PrintVisitor; +public: explicit ComponentCondReference(unsigned ref); explicit ComponentCondReference(const std::string &name); explicit ComponentCondReference(std::unique_ptr<Component> c); - - ~ComponentCondReference() override; - ComponentCondReference *clone() const override; - Component *accept(ComponentVisitor &v) override; - void accept(ConstComponentVisitor &v) const override; - - void addAlternation() override; - - std::vector<PositionInfo> first() const override; - std::vector<PositionInfo> last() const override; - - bool empty() const override; - void notePositions(GlushkovBuildState &bs) override; - void buildFollowSet(GlushkovBuildState &bs, - const std::vector<PositionInfo> &lastPos) override; - bool repeatable() const override; - -private: - ComponentCondReference(const ComponentCondReference &other); - - enum Condition { - CONDITION_NUMBER, - CONDITION_NAME, - CONDITION_ASSERTION - }; - - enum Condition kind; - - unsigned ref_id; - std::string ref_name; - std::unique_ptr<Component> assertion; - - /** True if an alternation has been added, which means we have both a YES - * and a NO branch. */ - bool hasBothBranches; -}; - -} // namespace ue2 - -#endif // PARSER_COMPONENTCONDREFERENCE_H_ + + ~ComponentCondReference() override; + ComponentCondReference *clone() const override; + Component *accept(ComponentVisitor &v) override; + void accept(ConstComponentVisitor &v) const override; + + void addAlternation() override; + + std::vector<PositionInfo> first() const override; + std::vector<PositionInfo> last() const override; + + bool empty() const override; + void notePositions(GlushkovBuildState &bs) override; + void buildFollowSet(GlushkovBuildState &bs, + const std::vector<PositionInfo> &lastPos) override; + bool repeatable() const override; + +private: + ComponentCondReference(const ComponentCondReference &other); + + enum Condition { + CONDITION_NUMBER, + CONDITION_NAME, + CONDITION_ASSERTION + }; + + enum Condition kind; + + unsigned ref_id; + std::string ref_name; + std::unique_ptr<Component> assertion; + + /** True if an alternation has been added, which means we have both a YES + * and a NO branch. */ + bool hasBothBranches; +}; + +} // namespace ue2 + +#endif // PARSER_COMPONENTCONDREFERENCE_H_ diff --git a/contrib/libs/hyperscan/src/parser/ComponentEUS.cpp b/contrib/libs/hyperscan/src/parser/ComponentEUS.cpp index 86c762de7d..27f30d7eb7 100644 --- a/contrib/libs/hyperscan/src/parser/ComponentEUS.cpp +++ b/contrib/libs/hyperscan/src/parser/ComponentEUS.cpp @@ -1,75 +1,75 @@ -/* - * Copyright (c) 2015, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -/** \file - * \brief Extended Unicode sequences (\\X) - */ - - -#include "ComponentEUS.h" - -#include "buildstate.h" -#include "position.h" -#include "position_info.h" -#include "Parser.h" -#include "nfagraph/ng_builder.h" -#include "util/charreach.h" - -using namespace std; - -namespace ue2 { - -ComponentEUS::ComponentEUS(u32 loc_in, const ParseMode &mode) - : loc(loc_in), utf8(mode.utf8), - position(GlushkovBuildState::POS_UNINITIALIZED) {} - -ComponentEUS::~ComponentEUS() {} - -ComponentEUS * ComponentEUS::clone() const { - return new ComponentEUS(*this); -} - -vector<PositionInfo> ComponentEUS::first() const { - return vector<PositionInfo>(1, PositionInfo(position)); -} - -vector<PositionInfo> ComponentEUS::last() const { - return vector<PositionInfo>(1, PositionInfo(position)); -} - -void ComponentEUS::notePositions(GlushkovBuildState &bs) { - NFABuilder &builder = bs.getBuilder(); - position = builder.makePositions(1); - builder.addCharReach(position, CharReach::dot()); - builder.setNodeReportID(position, 0 /* offset adj */); - if (utf8) { /* we are prefiltering, turn to.+ */ - builder.addEdge(position, position); - } -} - -} // namespace ue2 +/* + * Copyright (c) 2015, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/** \file + * \brief Extended Unicode sequences (\\X) + */ + + +#include "ComponentEUS.h" + +#include "buildstate.h" +#include "position.h" +#include "position_info.h" +#include "Parser.h" +#include "nfagraph/ng_builder.h" +#include "util/charreach.h" + +using namespace std; + +namespace ue2 { + +ComponentEUS::ComponentEUS(u32 loc_in, const ParseMode &mode) + : loc(loc_in), utf8(mode.utf8), + position(GlushkovBuildState::POS_UNINITIALIZED) {} + +ComponentEUS::~ComponentEUS() {} + +ComponentEUS * ComponentEUS::clone() const { + return new ComponentEUS(*this); +} + +vector<PositionInfo> ComponentEUS::first() const { + return vector<PositionInfo>(1, PositionInfo(position)); +} + +vector<PositionInfo> ComponentEUS::last() const { + return vector<PositionInfo>(1, PositionInfo(position)); +} + +void ComponentEUS::notePositions(GlushkovBuildState &bs) { + NFABuilder &builder = bs.getBuilder(); + position = builder.makePositions(1); + builder.addCharReach(position, CharReach::dot()); + builder.setNodeReportID(position, 0 /* offset adj */); + if (utf8) { /* we are prefiltering, turn to.+ */ + builder.addEdge(position, position); + } +} + +} // namespace ue2 diff --git a/contrib/libs/hyperscan/src/parser/ComponentEUS.h b/contrib/libs/hyperscan/src/parser/ComponentEUS.h index 3f8d920625..a71922de9e 100644 --- a/contrib/libs/hyperscan/src/parser/ComponentEUS.h +++ b/contrib/libs/hyperscan/src/parser/ComponentEUS.h @@ -1,86 +1,86 @@ -/* - * Copyright (c) 2015, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -/** \file - * \brief Extended Unicode sequences (\\X) - */ - -#ifndef _RE_COMPONENTEXTENDEDUNICODESEQUENCE_H_ -#define _RE_COMPONENTEXTENDEDUNICODESEQUENCE_H_ - -#include "Component.h" - -namespace ue2 { - -struct ParseMode; - -class ComponentEUS : public Component { - friend class DumpVisitor; - friend class UnsupportedVisitor; -public: - ComponentEUS(u32 loc, const ParseMode &mode); - ~ComponentEUS() override; - ComponentEUS *clone() const override; - - Component *accept(ComponentVisitor &v) override { - Component *c = v.visit(this); - v.post(this); - return c; - } - - void accept(ConstComponentVisitor &v) const override { - v.pre(*this); - v.during(*this); - v.post(*this); - } - - std::vector<PositionInfo> first() const override; - std::vector<PositionInfo> last() const override; - - bool empty() const override { return false; } - - void notePositions(GlushkovBuildState &bs) override; - void buildFollowSet(GlushkovBuildState &, - const std::vector<PositionInfo> &) override { - // all follow set construction is handled by firsts/lasts - return; - } - -private: - u32 loc; - bool utf8; - Position position; - - ComponentEUS(const ComponentEUS &other) - : Component(other), loc(other.loc), utf8(other.utf8), - position(other.position) {} -}; - -} // namespace ue2 - -#endif +/* + * Copyright (c) 2015, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/** \file + * \brief Extended Unicode sequences (\\X) + */ + +#ifndef _RE_COMPONENTEXTENDEDUNICODESEQUENCE_H_ +#define _RE_COMPONENTEXTENDEDUNICODESEQUENCE_H_ + +#include "Component.h" + +namespace ue2 { + +struct ParseMode; + +class ComponentEUS : public Component { + friend class DumpVisitor; + friend class UnsupportedVisitor; +public: + ComponentEUS(u32 loc, const ParseMode &mode); + ~ComponentEUS() override; + ComponentEUS *clone() const override; + + Component *accept(ComponentVisitor &v) override { + Component *c = v.visit(this); + v.post(this); + return c; + } + + void accept(ConstComponentVisitor &v) const override { + v.pre(*this); + v.during(*this); + v.post(*this); + } + + std::vector<PositionInfo> first() const override; + std::vector<PositionInfo> last() const override; + + bool empty() const override { return false; } + + void notePositions(GlushkovBuildState &bs) override; + void buildFollowSet(GlushkovBuildState &, + const std::vector<PositionInfo> &) override { + // all follow set construction is handled by firsts/lasts + return; + } + +private: + u32 loc; + bool utf8; + Position position; + + ComponentEUS(const ComponentEUS &other) + : Component(other), loc(other.loc), utf8(other.utf8), + position(other.position) {} +}; + +} // namespace ue2 + +#endif diff --git a/contrib/libs/hyperscan/src/parser/ComponentEmpty.cpp b/contrib/libs/hyperscan/src/parser/ComponentEmpty.cpp index f650d38a46..8664450b89 100644 --- a/contrib/libs/hyperscan/src/parser/ComponentEmpty.cpp +++ b/contrib/libs/hyperscan/src/parser/ComponentEmpty.cpp @@ -1,93 +1,93 @@ -/* - * Copyright (c) 2015, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -/** \file - * \brief Represents an empty regex element, like (?m) - */ -#include <cassert> - -#include "ComponentEmpty.h" -#include "position.h" -#include "position_info.h" -#include "buildstate.h" -#include "ue2common.h" - -using namespace std; - -namespace ue2 { - -ComponentEmpty::ComponentEmpty() { - // Surprise, it's EMPTY! -} - -ComponentEmpty::~ComponentEmpty() { - // Surprise, it's EMPTY! -} - -ComponentEmpty *ComponentEmpty::clone() const { return new ComponentEmpty(); } - -bool ComponentEmpty::empty() const { - return true; -} - -bool ComponentEmpty::vacuous_everywhere(void) const { - return true; -} - -bool ComponentEmpty::repeatable() const { - // This is the whole point of this class. Empty constructs like '(?m)' are - // not repeatable. - return false; -} - -vector<PositionInfo> ComponentEmpty::first() const { - return vector<PositionInfo>(1, GlushkovBuildState::POS_EPSILON); -} - -vector<PositionInfo> ComponentEmpty::last() const { - return vector<PositionInfo>(); -} - -void ComponentEmpty::notePositions(GlushkovBuildState &) { - // Nothing to do. -} - -void ComponentEmpty::buildFollowSet(GlushkovBuildState &, - const vector<PositionInfo> &) { - // Nothing to do. -} - -bool ComponentEmpty::checkEmbeddedStartAnchor(bool at_start) const { - return at_start; -} - -bool ComponentEmpty::checkEmbeddedEndAnchor(bool at_end) const { - return at_end; -} - -} // namespace ue2 +/* + * Copyright (c) 2015, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/** \file + * \brief Represents an empty regex element, like (?m) + */ +#include <cassert> + +#include "ComponentEmpty.h" +#include "position.h" +#include "position_info.h" +#include "buildstate.h" +#include "ue2common.h" + +using namespace std; + +namespace ue2 { + +ComponentEmpty::ComponentEmpty() { + // Surprise, it's EMPTY! +} + +ComponentEmpty::~ComponentEmpty() { + // Surprise, it's EMPTY! +} + +ComponentEmpty *ComponentEmpty::clone() const { return new ComponentEmpty(); } + +bool ComponentEmpty::empty() const { + return true; +} + +bool ComponentEmpty::vacuous_everywhere(void) const { + return true; +} + +bool ComponentEmpty::repeatable() const { + // This is the whole point of this class. Empty constructs like '(?m)' are + // not repeatable. + return false; +} + +vector<PositionInfo> ComponentEmpty::first() const { + return vector<PositionInfo>(1, GlushkovBuildState::POS_EPSILON); +} + +vector<PositionInfo> ComponentEmpty::last() const { + return vector<PositionInfo>(); +} + +void ComponentEmpty::notePositions(GlushkovBuildState &) { + // Nothing to do. +} + +void ComponentEmpty::buildFollowSet(GlushkovBuildState &, + const vector<PositionInfo> &) { + // Nothing to do. +} + +bool ComponentEmpty::checkEmbeddedStartAnchor(bool at_start) const { + return at_start; +} + +bool ComponentEmpty::checkEmbeddedEndAnchor(bool at_end) const { + return at_end; +} + +} // namespace ue2 diff --git a/contrib/libs/hyperscan/src/parser/ComponentEmpty.h b/contrib/libs/hyperscan/src/parser/ComponentEmpty.h index db3bcafaab..17564c6d62 100644 --- a/contrib/libs/hyperscan/src/parser/ComponentEmpty.h +++ b/contrib/libs/hyperscan/src/parser/ComponentEmpty.h @@ -1,75 +1,75 @@ -/* - * Copyright (c) 2015, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -/** \file - * \brief Represents an empty regex element, like (?m) - */ - -#ifndef PARSER_COMPONENT_EMPTY_H_ -#define PARSER_COMPONENT_EMPTY_H_ - -#include "Component.h" - -namespace ue2 { - -class ComponentEmpty : public Component { - friend class DumpVisitor; -public: - ComponentEmpty(); - ~ComponentEmpty() override; - ComponentEmpty *clone() const override; - - Component *accept(ComponentVisitor &v) override { - Component *c = v.visit(this); - v.post(this); - return c; - } - - void accept(ConstComponentVisitor &v) const override { - v.pre(*this); - v.during(*this); - v.post(*this); - } - - std::vector<PositionInfo> first() const override; - std::vector<PositionInfo> last() const override; - bool empty() const override; - bool vacuous_everywhere() const override; - bool repeatable() const override; - void notePositions(GlushkovBuildState &bs) override; - void buildFollowSet(GlushkovBuildState &bs, - const std::vector<PositionInfo> &lastPos) override; - - bool checkEmbeddedStartAnchor(bool at_start) const override; - bool checkEmbeddedEndAnchor(bool at_end) const override; - -}; - -} // namespace ue2 - -#endif // PARSER_COMPONENT_EMPTY_H_ +/* + * Copyright (c) 2015, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/** \file + * \brief Represents an empty regex element, like (?m) + */ + +#ifndef PARSER_COMPONENT_EMPTY_H_ +#define PARSER_COMPONENT_EMPTY_H_ + +#include "Component.h" + +namespace ue2 { + +class ComponentEmpty : public Component { + friend class DumpVisitor; +public: + ComponentEmpty(); + ~ComponentEmpty() override; + ComponentEmpty *clone() const override; + + Component *accept(ComponentVisitor &v) override { + Component *c = v.visit(this); + v.post(this); + return c; + } + + void accept(ConstComponentVisitor &v) const override { + v.pre(*this); + v.during(*this); + v.post(*this); + } + + std::vector<PositionInfo> first() const override; + std::vector<PositionInfo> last() const override; + bool empty() const override; + bool vacuous_everywhere() const override; + bool repeatable() const override; + void notePositions(GlushkovBuildState &bs) override; + void buildFollowSet(GlushkovBuildState &bs, + const std::vector<PositionInfo> &lastPos) override; + + bool checkEmbeddedStartAnchor(bool at_start) const override; + bool checkEmbeddedEndAnchor(bool at_end) const override; + +}; + +} // namespace ue2 + +#endif // PARSER_COMPONENT_EMPTY_H_ diff --git a/contrib/libs/hyperscan/src/parser/ComponentRepeat.cpp b/contrib/libs/hyperscan/src/parser/ComponentRepeat.cpp index 3d58c60eb9..09f59d05ec 100644 --- a/contrib/libs/hyperscan/src/parser/ComponentRepeat.cpp +++ b/contrib/libs/hyperscan/src/parser/ComponentRepeat.cpp @@ -1,188 +1,188 @@ -/* - * Copyright (c) 2015, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -/** \file - * \brief Repeats ('*', '+', '?', '{M,N}', etc) - */ - - -#include "ComponentRepeat.h" - -#include "buildstate.h" -#include "nfagraph/ng_builder.h" -#include "parse_error.h" -#include "Parser.h" -#include "position.h" -#include "position_dump.h" -#include "position_info.h" -#include "ue2common.h" -#include "util/make_unique.h" - -#include <algorithm> -#include <cassert> - -using namespace std; - -namespace ue2 { - -/** \brief Hard limit on the maximum repeat for bounded repeats. */ -static constexpr u32 MAX_REPEAT = 32767; - -/** \brief If expanding a repeat would lead to this many positions being - * generated, we fail the pattern. */ -static constexpr u32 MAX_POSITIONS_EXPANDED = 500000; // arbitrarily huge - -/* no edge priorities means that if our subcomponent can be empty, our min - * extent is effectively zero. */ -ComponentRepeat::ComponentRepeat(unique_ptr<Component> sub_comp_in, u32 min, - u32 max, enum RepeatType t) - : type(t), sub_comp(move(sub_comp_in)), m_min(min), m_max(max), - posFirst(GlushkovBuildState::POS_UNINITIALIZED), - posLast(GlushkovBuildState::POS_UNINITIALIZED) { - assert(sub_comp); - assert(max > 0); - assert(m_min <= m_max); - - if (m_min > MAX_REPEAT) { - throw ParseError("Bounded repeat is too large."); - } - if (m_max != NoLimit && m_max > MAX_REPEAT) { - throw ParseError("Bounded repeat is too large."); - } -} - -ComponentRepeat::~ComponentRepeat() {} - -ComponentRepeat *ComponentRepeat::clone() const { - return new ComponentRepeat(*this); -} - -ComponentRepeat::ComponentRepeat(const ComponentRepeat &other) - : Component(other), - type(other.type), sub_comp(unique_ptr<Component>(other.sub_comp->clone())), - m_min(other.m_min), m_max(other.m_max), - m_firsts(other.m_firsts), m_lasts(other.m_lasts), +/* + * Copyright (c) 2015, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/** \file + * \brief Repeats ('*', '+', '?', '{M,N}', etc) + */ + + +#include "ComponentRepeat.h" + +#include "buildstate.h" +#include "nfagraph/ng_builder.h" +#include "parse_error.h" +#include "Parser.h" +#include "position.h" +#include "position_dump.h" +#include "position_info.h" +#include "ue2common.h" +#include "util/make_unique.h" + +#include <algorithm> +#include <cassert> + +using namespace std; + +namespace ue2 { + +/** \brief Hard limit on the maximum repeat for bounded repeats. */ +static constexpr u32 MAX_REPEAT = 32767; + +/** \brief If expanding a repeat would lead to this many positions being + * generated, we fail the pattern. */ +static constexpr u32 MAX_POSITIONS_EXPANDED = 500000; // arbitrarily huge + +/* no edge priorities means that if our subcomponent can be empty, our min + * extent is effectively zero. */ +ComponentRepeat::ComponentRepeat(unique_ptr<Component> sub_comp_in, u32 min, + u32 max, enum RepeatType t) + : type(t), sub_comp(move(sub_comp_in)), m_min(min), m_max(max), + posFirst(GlushkovBuildState::POS_UNINITIALIZED), + posLast(GlushkovBuildState::POS_UNINITIALIZED) { + assert(sub_comp); + assert(max > 0); + assert(m_min <= m_max); + + if (m_min > MAX_REPEAT) { + throw ParseError("Bounded repeat is too large."); + } + if (m_max != NoLimit && m_max > MAX_REPEAT) { + throw ParseError("Bounded repeat is too large."); + } +} + +ComponentRepeat::~ComponentRepeat() {} + +ComponentRepeat *ComponentRepeat::clone() const { + return new ComponentRepeat(*this); +} + +ComponentRepeat::ComponentRepeat(const ComponentRepeat &other) + : Component(other), + type(other.type), sub_comp(unique_ptr<Component>(other.sub_comp->clone())), + m_min(other.m_min), m_max(other.m_max), + m_firsts(other.m_firsts), m_lasts(other.m_lasts), posFirst(other.posFirst), posLast(other.posLast) {} - -bool ComponentRepeat::empty() const { - return m_min == 0 || sub_comp->empty(); -} - -bool ComponentRepeat::repeatable() const { - return false; -} - -static -void addBase(Position base, vector<PositionInfo> &firsts, - vector<PositionInfo> &lasts) { - for (auto &e : firsts) { - if (e.pos != GlushkovBuildState::POS_EPSILON) { - e.pos += base; - } - } - for (auto &e : lasts) { - e.pos += base; - } -} - -static -void checkPositions(vector<PositionInfo> &v, const GlushkovBuildState &bs) { - const NFABuilder& builder = bs.getBuilder(); - for (const auto &e : v) { - if (builder.isSpecialState(e.pos)) { - throw ParseError("Embedded anchors not supported."); - } - } -} - -void ComponentRepeat::notePositions(GlushkovBuildState &bs) { - assert(m_max > 0); - assert(m_max == NoLimit || m_max < MAX_REPEAT); - - /* Note: We can construct smaller subgraphs if we're not maintaining edge - * priorities. */ - - // We create one copy only through a recursive call to notePositions(), - // first() and last(). Then we clone its positions and store the - // appropriate firsts and lasts values for the copies. - posFirst = bs.getBuilder().numVertices(); - sub_comp->notePositions(bs); - - u32 copies = m_max < NoLimit ? m_max : MAX(m_min, 1); - DEBUG_PRINTF("building %u copies of repeated region\n", copies); - m_firsts.clear(); - m_lasts.clear(); - m_firsts.resize(copies); - m_lasts.resize(copies); - - m_firsts[0] = sub_comp->first(); - m_lasts[0] = sub_comp->last(); - - postSubNotePositionHook(); - - posLast = bs.getBuilder().numVertices() - 1; - u32 vcount = posLast + 1 - posFirst; - - // If we're making more than one copy, then our firsts and lasts must only - // contain vertices inside [posFirst, posLast]: anything else means we have - // an embedded anchor or otherwise weird situation. - if (copies > 1) { - checkPositions(m_firsts[0], bs); - checkPositions(m_lasts[0], bs); - } - - // Avoid enormous expansions - if (vcount * copies > MAX_POSITIONS_EXPANDED) { - throw ParseError("Bounded repeat is too large."); - } - - // Add positions for the rest of the copies - size_t copyPositions = vcount * (copies - 1); - bs.getBuilder().makePositions(copyPositions); - - // Calculate our firsts and lasts for the copies - for (u32 i = 1; i < copies; ++i) { - m_firsts[i] = m_firsts[0]; - m_lasts[i] = m_lasts[0]; - u32 base = i * vcount; - addBase(base, m_firsts[i], m_lasts[i]); - } - - recordPosBounds(posFirst, bs.getBuilder().numVertices()); + +bool ComponentRepeat::empty() const { + return m_min == 0 || sub_comp->empty(); +} + +bool ComponentRepeat::repeatable() const { + return false; +} + +static +void addBase(Position base, vector<PositionInfo> &firsts, + vector<PositionInfo> &lasts) { + for (auto &e : firsts) { + if (e.pos != GlushkovBuildState::POS_EPSILON) { + e.pos += base; + } + } + for (auto &e : lasts) { + e.pos += base; + } +} + +static +void checkPositions(vector<PositionInfo> &v, const GlushkovBuildState &bs) { + const NFABuilder& builder = bs.getBuilder(); + for (const auto &e : v) { + if (builder.isSpecialState(e.pos)) { + throw ParseError("Embedded anchors not supported."); + } + } +} + +void ComponentRepeat::notePositions(GlushkovBuildState &bs) { + assert(m_max > 0); + assert(m_max == NoLimit || m_max < MAX_REPEAT); + + /* Note: We can construct smaller subgraphs if we're not maintaining edge + * priorities. */ + + // We create one copy only through a recursive call to notePositions(), + // first() and last(). Then we clone its positions and store the + // appropriate firsts and lasts values for the copies. + posFirst = bs.getBuilder().numVertices(); + sub_comp->notePositions(bs); + + u32 copies = m_max < NoLimit ? m_max : MAX(m_min, 1); + DEBUG_PRINTF("building %u copies of repeated region\n", copies); + m_firsts.clear(); + m_lasts.clear(); + m_firsts.resize(copies); + m_lasts.resize(copies); + + m_firsts[0] = sub_comp->first(); + m_lasts[0] = sub_comp->last(); + + postSubNotePositionHook(); + + posLast = bs.getBuilder().numVertices() - 1; + u32 vcount = posLast + 1 - posFirst; + + // If we're making more than one copy, then our firsts and lasts must only + // contain vertices inside [posFirst, posLast]: anything else means we have + // an embedded anchor or otherwise weird situation. + if (copies > 1) { + checkPositions(m_firsts[0], bs); + checkPositions(m_lasts[0], bs); + } + + // Avoid enormous expansions + if (vcount * copies > MAX_POSITIONS_EXPANDED) { + throw ParseError("Bounded repeat is too large."); + } + + // Add positions for the rest of the copies + size_t copyPositions = vcount * (copies - 1); + bs.getBuilder().makePositions(copyPositions); + + // Calculate our firsts and lasts for the copies + for (u32 i = 1; i < copies; ++i) { + m_firsts[i] = m_firsts[0]; + m_lasts[i] = m_lasts[0]; + u32 base = i * vcount; + addBase(base, m_firsts[i], m_lasts[i]); + } + + recordPosBounds(posFirst, bs.getBuilder().numVertices()); // Each optional repeat has an epsilon at the end of its firsts list. for (u32 i = m_min; i < m_firsts.size(); i++) { m_firsts[i].push_back(GlushkovBuildState::POS_EPSILON); } -} - -vector<PositionInfo> ComponentRepeat::first() const { +} + +vector<PositionInfo> ComponentRepeat::first() const { if (!m_max) { return {}; } @@ -192,177 +192,177 @@ vector<PositionInfo> ComponentRepeat::first() const { DEBUG_PRINTF("firsts = %s\n", dumpPositions(begin(firsts), end(firsts)).c_str()); return firsts; -} - -void ComponentRepeat::buildFollowSet(GlushkovBuildState &bs, - const vector<PositionInfo> &lastPos) { - if (!m_max) { - return; - } - DEBUG_PRINTF("enter\n"); - - // Wire up the first (the "real") entry - - DEBUG_PRINTF("initial repeat\n"); - sub_comp->buildFollowSet(bs, lastPos); - - // Clone the subgraph we just added N times, where N is the minimum extent - // of the graph minus one, wiring them up in a linear sequence - - u32 copies = m_firsts.size(); - DEBUG_PRINTF("cloning %u copies of repeat\n", copies - 1); - for (u32 rep = 1; rep < copies; rep++) { - u32 offset = (posLast + 1 - posFirst) * rep; - if (offset > 0) { - bs.cloneFollowSet(posFirst, posLast, offset); - } - } - +} + +void ComponentRepeat::buildFollowSet(GlushkovBuildState &bs, + const vector<PositionInfo> &lastPos) { + if (!m_max) { + return; + } + DEBUG_PRINTF("enter\n"); + + // Wire up the first (the "real") entry + + DEBUG_PRINTF("initial repeat\n"); + sub_comp->buildFollowSet(bs, lastPos); + + // Clone the subgraph we just added N times, where N is the minimum extent + // of the graph minus one, wiring them up in a linear sequence + + u32 copies = m_firsts.size(); + DEBUG_PRINTF("cloning %u copies of repeat\n", copies - 1); + for (u32 rep = 1; rep < copies; rep++) { + u32 offset = (posLast + 1 - posFirst) * rep; + if (offset > 0) { + bs.cloneFollowSet(posFirst, posLast, offset); + } + } + wireRepeats(bs); - - DEBUG_PRINTF("leave\n"); -} - -void ComponentRepeat::optimise(bool connected_to_sds) { - DEBUG_PRINTF("opt %d\n", (int)connected_to_sds); - if (!connected_to_sds) { - return; - } - - DEBUG_PRINTF("setting m_max to %u\n", m_min); - m_max = m_min; -} - -bool ComponentRepeat::vacuous_everywhere() const { + + DEBUG_PRINTF("leave\n"); +} + +void ComponentRepeat::optimise(bool connected_to_sds) { + DEBUG_PRINTF("opt %d\n", (int)connected_to_sds); + if (!connected_to_sds) { + return; + } + + DEBUG_PRINTF("setting m_max to %u\n", m_min); + m_max = m_min; +} + +bool ComponentRepeat::vacuous_everywhere() const { return !m_min || sub_comp->vacuous_everywhere(); -} - -bool ComponentRepeat::checkEmbeddedStartAnchor(bool at_start) const { - at_start = sub_comp->checkEmbeddedStartAnchor(at_start); - - if (m_max > 1) { - at_start = sub_comp->checkEmbeddedStartAnchor(at_start); - } - - return at_start; -} - -bool ComponentRepeat::checkEmbeddedEndAnchor(bool at_end) const { - at_end = sub_comp->checkEmbeddedEndAnchor(at_end); - - if (m_max > 1) { - at_end = sub_comp->checkEmbeddedEndAnchor(at_end); - } - - return at_end; -} - -Component *ComponentRepeat::accept(ComponentVisitor &v) { - Component *c = v.visit(this); - if (c != this) { - v.post(this); - return c; - } - - c = sub_comp->accept(v); - if (c != sub_comp.get()) { - sub_comp.reset(c); - } - - v.post(this); - return !sub_comp ? nullptr : this; -} - -void ComponentRepeat::accept(ConstComponentVisitor &v) const { - v.pre(*this); - sub_comp->accept(v); - v.post(*this); -} - -vector<PositionInfo> ComponentRepeat::last() const { - vector<PositionInfo> lasts; - if (!m_max) { - return lasts; - } - - assert(!m_firsts.empty()); // notePositions should already have run - assert(!m_lasts.empty()); - +} + +bool ComponentRepeat::checkEmbeddedStartAnchor(bool at_start) const { + at_start = sub_comp->checkEmbeddedStartAnchor(at_start); + + if (m_max > 1) { + at_start = sub_comp->checkEmbeddedStartAnchor(at_start); + } + + return at_start; +} + +bool ComponentRepeat::checkEmbeddedEndAnchor(bool at_end) const { + at_end = sub_comp->checkEmbeddedEndAnchor(at_end); + + if (m_max > 1) { + at_end = sub_comp->checkEmbeddedEndAnchor(at_end); + } + + return at_end; +} + +Component *ComponentRepeat::accept(ComponentVisitor &v) { + Component *c = v.visit(this); + if (c != this) { + v.post(this); + return c; + } + + c = sub_comp->accept(v); + if (c != sub_comp.get()) { + sub_comp.reset(c); + } + + v.post(this); + return !sub_comp ? nullptr : this; +} + +void ComponentRepeat::accept(ConstComponentVisitor &v) const { + v.pre(*this); + sub_comp->accept(v); + v.post(*this); +} + +vector<PositionInfo> ComponentRepeat::last() const { + vector<PositionInfo> lasts; + if (!m_max) { + return lasts; + } + + assert(!m_firsts.empty()); // notePositions should already have run + assert(!m_lasts.empty()); + const auto &l = m_min ? m_lasts[m_min - 1] : m_lasts[0]; lasts.insert(lasts.end(), l.begin(), l.end()); - if (!m_min || m_min != m_lasts.size()) { - lasts.insert(lasts.end(), m_lasts.back().begin(), m_lasts.back().end()); - } + if (!m_min || m_min != m_lasts.size()) { + lasts.insert(lasts.end(), m_lasts.back().begin(), m_lasts.back().end()); + } DEBUG_PRINTF("lasts = %s\n", dumpPositions(lasts.begin(), lasts.end()).c_str()); - return lasts; -} - + return lasts; +} + void ComponentRepeat::wireRepeats(GlushkovBuildState &bs) { - /* note: m_lasts[0] already valid */ - u32 copies = m_firsts.size(); - const bool isEmpty = sub_comp->empty(); + /* note: m_lasts[0] already valid */ + u32 copies = m_firsts.size(); + const bool isEmpty = sub_comp->empty(); const vector<PositionInfo> &optLasts = m_min ? m_lasts[m_min - 1] : m_lasts[0]; - - if (!copies) { - goto inf_check; - } - - DEBUG_PRINTF("wiring up %u mand repeats\n", m_min); - for (u32 rep = 1; rep < m_min; rep++) { - bs.connectRegions(m_lasts[rep - 1], m_firsts[rep]); - - if (isEmpty) { - m_lasts[rep].insert(m_lasts[rep].end(), m_lasts[rep - 1].begin(), - m_lasts[rep - 1].end()); - } - } - - DEBUG_PRINTF("wiring up %d optional repeats\n", copies - m_min); - for (u32 rep = MAX(m_min, 1); rep < copies; rep++) { - vector<PositionInfo> lasts = m_lasts[rep - 1]; + + if (!copies) { + goto inf_check; + } + + DEBUG_PRINTF("wiring up %u mand repeats\n", m_min); + for (u32 rep = 1; rep < m_min; rep++) { + bs.connectRegions(m_lasts[rep - 1], m_firsts[rep]); + + if (isEmpty) { + m_lasts[rep].insert(m_lasts[rep].end(), m_lasts[rep - 1].begin(), + m_lasts[rep - 1].end()); + } + } + + DEBUG_PRINTF("wiring up %d optional repeats\n", copies - m_min); + for (u32 rep = MAX(m_min, 1); rep < copies; rep++) { + vector<PositionInfo> lasts = m_lasts[rep - 1]; if (rep != m_min) { - lasts.insert(lasts.end(), optLasts.begin(), optLasts.end()); - sort(lasts.begin(), lasts.end()); - lasts.erase(unique(lasts.begin(), lasts.end()), lasts.end()); - } - bs.connectRegions(lasts, m_firsts[rep]); - } - -inf_check: - // If we have no max bound, we need a self-loop as well. - if (m_max == NoLimit) { - DEBUG_PRINTF("final repeat self-loop\n"); - bs.connectRegions(m_lasts.back(), m_firsts.back()); - } -} - -static -bool hasPositionFlags(const Component &c) { - for (const auto &e : c.first()) { - if (e.flags) { - return true; - } - } - return false; -} - -void ComponentRepeat::postSubNotePositionHook() { - // UE-444 optimization: we can REWRITE m_min under various circumstances, - // so that we create smaller NFA graphs. Note that this is _not_ possible - // if our subcomponent contains a flagged position, e.g. nofloat. - if (!hasPositionFlags(*sub_comp) && sub_comp->empty()) { - m_min = 0; - } -} - -unique_ptr<ComponentRepeat> makeComponentRepeat(unique_ptr<Component> sub_comp, - u32 min, u32 max, - ComponentRepeat::RepeatType t) { - return ue2::make_unique<ComponentRepeat>(move(sub_comp), min, max, t); -} - -} // namespace ue2 + lasts.insert(lasts.end(), optLasts.begin(), optLasts.end()); + sort(lasts.begin(), lasts.end()); + lasts.erase(unique(lasts.begin(), lasts.end()), lasts.end()); + } + bs.connectRegions(lasts, m_firsts[rep]); + } + +inf_check: + // If we have no max bound, we need a self-loop as well. + if (m_max == NoLimit) { + DEBUG_PRINTF("final repeat self-loop\n"); + bs.connectRegions(m_lasts.back(), m_firsts.back()); + } +} + +static +bool hasPositionFlags(const Component &c) { + for (const auto &e : c.first()) { + if (e.flags) { + return true; + } + } + return false; +} + +void ComponentRepeat::postSubNotePositionHook() { + // UE-444 optimization: we can REWRITE m_min under various circumstances, + // so that we create smaller NFA graphs. Note that this is _not_ possible + // if our subcomponent contains a flagged position, e.g. nofloat. + if (!hasPositionFlags(*sub_comp) && sub_comp->empty()) { + m_min = 0; + } +} + +unique_ptr<ComponentRepeat> makeComponentRepeat(unique_ptr<Component> sub_comp, + u32 min, u32 max, + ComponentRepeat::RepeatType t) { + return ue2::make_unique<ComponentRepeat>(move(sub_comp), min, max, t); +} + +} // namespace ue2 diff --git a/contrib/libs/hyperscan/src/parser/ComponentRepeat.h b/contrib/libs/hyperscan/src/parser/ComponentRepeat.h index 824a986386..8905bfcf5e 100644 --- a/contrib/libs/hyperscan/src/parser/ComponentRepeat.h +++ b/contrib/libs/hyperscan/src/parser/ComponentRepeat.h @@ -1,65 +1,65 @@ -/* - * Copyright (c) 2015, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -/** \file - * \brief Repeats ('*', '+', '?', '{M,N}', etc) - */ - -#ifndef RE_COMPONENTREPEAT_H -#define RE_COMPONENTREPEAT_H - -#include "Component.h" -#include "position.h" -#include "ue2common.h" - -#include <memory> -#include <utility> - -namespace ue2 { - +/* + * Copyright (c) 2015, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/** \file + * \brief Repeats ('*', '+', '?', '{M,N}', etc) + */ + +#ifndef RE_COMPONENTREPEAT_H +#define RE_COMPONENTREPEAT_H + +#include "Component.h" +#include "position.h" +#include "ue2common.h" + +#include <memory> +#include <utility> + +namespace ue2 { + /** * \brief Encapsulates a repeat of a subexpression ('*', '+', '?', '{M,N}', - * etc). - * + * etc). + * * ASCII Art Time: - * - * Our standard representation of standard repeats. Other constructions (fan-in - * vs fan-out) would also be possible and equivalent for our purposes. - * - * {n,m} - * + * + * Our standard representation of standard repeats. Other constructions (fan-in + * vs fan-out) would also be possible and equivalent for our purposes. + * + * {n,m} + * * S->M->M->M->O->O->O->T * | ^ ^ ^ * | | | | * \-----------/ - * - * {0,m} - * + * + * {0,m} + * * /-----------\ * | | * | V @@ -67,78 +67,78 @@ namespace ue2 { * | ^ ^ ^ * | | | | * \--------/ - * - */ -class ComponentRepeat : public Component { - friend class ConstructLiteralVisitor; - friend class DumpVisitor; - friend class PrintVisitor; - friend class SimplifyVisitor; -public: - /** \brief Value representing no maximum bound. */ - static constexpr u32 NoLimit = 0xffffffff; - - /** \brief Type of this repeat, characterising its - * greediness/possessiveness. */ - enum RepeatType { - /** Minimising repeat, like 'a*?'. */ - REPEAT_NONGREEDY, - /** Maximising repeat, like 'a*'. This is the default in PCRE. */ - REPEAT_GREEDY, - /** Possessive, maximising repeat, like 'a*+'. Possessive repeats are - * only currently supported in prefiltering mode, where we treat them - * the same way we treat normal greedy repeats. */ - REPEAT_POSSESSIVE, - }; - - ComponentRepeat(std::unique_ptr<Component> sub_comp, u32 min, u32 max, - RepeatType t); - ~ComponentRepeat() override; - ComponentRepeat *clone() const override; - Component *accept(ComponentVisitor &v) override; - void accept(ConstComponentVisitor &v) const override; - - std::vector<PositionInfo> first() const override; - std::vector<PositionInfo> last() const override; - bool empty() const override; - bool repeatable() const override; - bool vacuous_everywhere() const override; - void notePositions(GlushkovBuildState &bs) override; - void buildFollowSet(GlushkovBuildState &bs, - const std::vector<PositionInfo> &lastPos) override; - bool checkEmbeddedStartAnchor(bool at_start) const override; - bool checkEmbeddedEndAnchor(bool at_end) const override; - - void optimise(bool connected_to_sds) override; - - virtual std::pair<u32, u32> getBounds() const { - return std::make_pair(m_min, m_max); - } - - /** \brief From declared behaviour (not taking into account the - * sub-component). */ - enum RepeatType type; - -protected: - void postSubNotePositionHook(); + * + */ +class ComponentRepeat : public Component { + friend class ConstructLiteralVisitor; + friend class DumpVisitor; + friend class PrintVisitor; + friend class SimplifyVisitor; +public: + /** \brief Value representing no maximum bound. */ + static constexpr u32 NoLimit = 0xffffffff; + + /** \brief Type of this repeat, characterising its + * greediness/possessiveness. */ + enum RepeatType { + /** Minimising repeat, like 'a*?'. */ + REPEAT_NONGREEDY, + /** Maximising repeat, like 'a*'. This is the default in PCRE. */ + REPEAT_GREEDY, + /** Possessive, maximising repeat, like 'a*+'. Possessive repeats are + * only currently supported in prefiltering mode, where we treat them + * the same way we treat normal greedy repeats. */ + REPEAT_POSSESSIVE, + }; + + ComponentRepeat(std::unique_ptr<Component> sub_comp, u32 min, u32 max, + RepeatType t); + ~ComponentRepeat() override; + ComponentRepeat *clone() const override; + Component *accept(ComponentVisitor &v) override; + void accept(ConstComponentVisitor &v) const override; + + std::vector<PositionInfo> first() const override; + std::vector<PositionInfo> last() const override; + bool empty() const override; + bool repeatable() const override; + bool vacuous_everywhere() const override; + void notePositions(GlushkovBuildState &bs) override; + void buildFollowSet(GlushkovBuildState &bs, + const std::vector<PositionInfo> &lastPos) override; + bool checkEmbeddedStartAnchor(bool at_start) const override; + bool checkEmbeddedEndAnchor(bool at_end) const override; + + void optimise(bool connected_to_sds) override; + + virtual std::pair<u32, u32> getBounds() const { + return std::make_pair(m_min, m_max); + } + + /** \brief From declared behaviour (not taking into account the + * sub-component). */ + enum RepeatType type; + +protected: + void postSubNotePositionHook(); void wireRepeats(GlushkovBuildState &bs); - - std::unique_ptr<Component> sub_comp; - u32 m_min; - u32 m_max; - - std::vector<std::vector<PositionInfo> > m_firsts; - std::vector<std::vector<PositionInfo> > m_lasts; - Position posFirst; - Position posLast; - - ComponentRepeat(const ComponentRepeat &other); -}; - -std::unique_ptr<ComponentRepeat> -makeComponentRepeat(std::unique_ptr<Component> sub_comp, u32 min, u32 max, - ComponentRepeat::RepeatType t); - -} // namespace ue2 - -#endif // _RE_COMPONENTREPEAT_H_ + + std::unique_ptr<Component> sub_comp; + u32 m_min; + u32 m_max; + + std::vector<std::vector<PositionInfo> > m_firsts; + std::vector<std::vector<PositionInfo> > m_lasts; + Position posFirst; + Position posLast; + + ComponentRepeat(const ComponentRepeat &other); +}; + +std::unique_ptr<ComponentRepeat> +makeComponentRepeat(std::unique_ptr<Component> sub_comp, u32 min, u32 max, + ComponentRepeat::RepeatType t); + +} // namespace ue2 + +#endif // _RE_COMPONENTREPEAT_H_ diff --git a/contrib/libs/hyperscan/src/parser/ComponentSequence.cpp b/contrib/libs/hyperscan/src/parser/ComponentSequence.cpp index fd198b222e..b0b5b13935 100644 --- a/contrib/libs/hyperscan/src/parser/ComponentSequence.cpp +++ b/contrib/libs/hyperscan/src/parser/ComponentSequence.cpp @@ -1,376 +1,376 @@ -/* - * Copyright (c) 2015, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -/** \file - * \brief Sequence of Component objects. - */ - - -#include "ComponentSequence.h" - -#include "buildstate.h" -#include "ComponentAlternation.h" -#include "ComponentRepeat.h" -#include "Parser.h" -#include "ue2common.h" -#include "parse_error.h" -#include "position_dump.h" -#include "position_info.h" -#include "nfagraph/ng_builder.h" -#include "util/container.h" -#include "util/make_unique.h" - -#include <algorithm> -#include <cassert> - -using namespace std; - -namespace ue2 { - -ComponentSequence::ComponentSequence() : capture_index(NOT_CAPTURED) {} - -ComponentSequence::~ComponentSequence() {} - -ComponentSequence::ComponentSequence(const ComponentSequence &other) - : Component(other), capture_index(other.capture_index) { - // Deep copy children. - for (const auto &c : other.children) { - assert(c); - children.push_back(unique_ptr<Component>(c->clone())); - } - if (other.alternation) { - const ComponentAlternation &c = *other.alternation; - alternation.reset(c.clone()); - } -} - -ComponentSequence *ComponentSequence::clone() const { - return new ComponentSequence(*this); -} - -Component *ComponentSequence::accept(ComponentVisitor &v) { - assert(!alternation); // Sequence must be finalized first. - - Component *c = v.visit(this); - if (c != this) { - v.post(this); - return c; - } - - for (auto i = children.begin(), e = children.end(); i != e; ++i) { - Component *child = i->get(); - c = (*i)->accept(v); - if (c != child) { - // Child has been replaced (new Component pointer) or we've been - // instructed to delete it (null). - i->reset(c); - } - } - - // Remove deleted children. - children.erase(remove(children.begin(), children.end(), nullptr), - children.end()); - - v.post(this); - return this; -} - -void ComponentSequence::accept(ConstComponentVisitor &v) const { - assert(!alternation); // Sequence must be finalized first. - - v.pre(*this); - - for (auto i = children.begin(), e = children.end(); i != e; ++i) { - (*i)->accept(v); - - if (i + 1 != e) { - v.during(*this); - } - } - - v.post(*this); -} - -void ComponentSequence::addComponent(unique_ptr<Component> comp) { - children.push_back(move(comp)); -} - -bool ComponentSequence::addRepeat(u32 min, u32 max, - ComponentRepeat::RepeatType type) { - if (children.empty() || min > max || max == 0) { - return false; - } - - // We can't apply a repeat to some types of component. - assert(children.back()); - if (!children.back()->repeatable()) { - return false; - } - - children.back() = makeComponentRepeat(move(children.back()), min, max, - type); - assert(children.back()); - return true; -} - -void ComponentSequence::addAlternation() { - if (!alternation) { - alternation = ue2::make_unique<ComponentAlternation>(); - } - - auto seq = ue2::make_unique<ComponentSequence>(); - seq->children.swap(children); - alternation->append(move(seq)); -} - -void ComponentSequence::finalize() { - if (alternation) { - addAlternation(); - assert(children.empty()); - children.push_back(move(alternation)); - alternation = nullptr; - } -} - -vector<PositionInfo> ComponentSequence::first() const { - vector<PositionInfo> firsts, subfirsts; - - for (const auto &c : children) { - subfirsts = c->first(); - replaceEpsilons(firsts, subfirsts); - if (!c->empty()) { - break; - } - } - - if (firsts.empty()) { - DEBUG_PRINTF("trivial empty sequence %zu\n", firsts.size()); - assert(children.empty()); - firsts.push_back(GlushkovBuildState::POS_EPSILON); - } - - DEBUG_PRINTF("%zu firsts\n", firsts.size()); - return firsts; -} - -namespace { -struct eps_info { - eps_info() : flags(0U) {} - u32 flags; -}; -} - -static -void epsilonVisit(vector<eps_info> *info, const vector<PositionInfo> &f) { - vector<eps_info> out; - out.reserve(info->size()); - - set<u32> seen_flags; - - assert(!info->empty()); - for (auto eps = find(f.begin(), f.end(), GlushkovBuildState::POS_EPSILON); - eps != f.end(); - eps = find(eps + 1, f.end(), GlushkovBuildState::POS_EPSILON)) { - for (auto it = info->begin(); it != info->end(); ++it) { - u32 flags = it->flags | eps->flags; - if (contains(seen_flags, flags)) { - continue; - } - - out.push_back(*it); - out.back().flags = flags; - seen_flags.insert(flags); - } - } - - info->swap(out); - assert(!info->empty()); -} - -static -void applyEpsilonVisits(vector<PositionInfo> &lasts, - const vector<eps_info> &eps_visits) { - vector<PositionInfo> out; - out.reserve(lasts.size() * eps_visits.size()); - - for (const auto &last : lasts) { - for (const auto &e : eps_visits) { - out.push_back(last); - out.back().flags |= e.flags; - } - } - - cleanupPositions(out); - lasts.swap(out); -} - -vector<PositionInfo> ComponentSequence::last() const { - vector<PositionInfo> lasts, sublasts; - vector<eps_info> visits(1); - - auto i = children.rbegin(), e = children.rend(); - for (; i != e; ++i) { - sublasts = (*i)->last(); - applyEpsilonVisits(sublasts, visits); - lasts.insert(lasts.end(), sublasts.begin(), sublasts.end()); - if ((*i)->empty()) { - // this epsilon's flags should propagate to subsequent lasts' - // enter/exit lists - epsilonVisit(&visits, (*i)->first()); - } else { - break; - } - } - - DEBUG_PRINTF("lasts = %s\n", - dumpPositions(lasts.begin(), lasts.end()).c_str()); - return lasts; -} - -bool ComponentSequence::empty(void) const { - // a sequence can be empty if all its subcomponents can be empty - for (const auto &c : children) { - if (!c->empty()) { - return false; - } - } - return true; -} - -void ComponentSequence::notePositions(GlushkovBuildState &bs) { - u32 pb = bs.getBuilder().numVertices(); - for (auto &c : children) { - c->notePositions(bs); - } - recordPosBounds(pb, bs.getBuilder().numVertices()); -} - -void ComponentSequence::buildFollowSet(GlushkovBuildState &bs, - const vector<PositionInfo> &lastPos) { - DEBUG_PRINTF("sequence of %zu components\n", children.size()); - - // If no components, no work to do. - if (children.empty()) { - return; - } - - // First element - children.front()->buildFollowSet(bs, lastPos); - if (children.size() == 1) { - // If our sequence contains precisely one component, then we've done - // all our work. Hooking up its firsts and lasts will be done by our - // parent component. - return; - } - - // Remaining elements, wiring last to first in sequence. - - vector<PositionInfo> prevLasts = children.front()->last(); - - for (auto it = next(children.begin()), ite = children.end(); it != ite; ++it) { - assert(*it); - Component &c = *(*it); - - // Build subcomponent follow set - c.buildFollowSet(bs, prevLasts); - - // FIRST(curr) - vector<PositionInfo> currFirsts(c.first()); - - // LAST(prev) => FIRST(curr) - DEBUG_PRINTF("connecting lasts (|| %zu) to firsts of comp %zd\n", - prevLasts.size(), it - children.begin()); - bs.connectRegions(prevLasts, currFirsts); - - // Generate a new LAST(prev) for the next iteration; either c->last() - // on its own if it can't be empty or c->last unioned with the previous - // last if c can be empty - vector<PositionInfo> currLasts(c.last()); - - if (!c.empty()) { - // Current component can't be empty, so use its lasts only - prevLasts.swap(currLasts); - DEBUG_PRINTF("swapped lasts\n"); - } else { - // Add current lasts to previous lasts - DEBUG_PRINTF("doing stuff for empty comp\n"); - prevLasts.insert(prevLasts.end(), currLasts.begin(), currLasts.end()); - DEBUG_PRINTF("done stuff for empty comp\n"); - } - } -} - -bool ComponentSequence::checkEmbeddedStartAnchor(bool at_start) const { - for (const auto &c : children) { - at_start = c->checkEmbeddedStartAnchor(at_start); - } - - return at_start; -} - -bool ComponentSequence::checkEmbeddedEndAnchor(bool at_end) const { - // Note reversed ordering. - for (auto i = children.rbegin(), e = children.rend(); i != e; ++i) { - at_end = (*i)->checkEmbeddedEndAnchor(at_end); - } - - return at_end; -} - -bool ComponentSequence::vacuous_everywhere() const { - for (const auto &c : children) { - if (!c->vacuous_everywhere()) { - return false; - } - } - return true; -} - -void ComponentSequence::optimise(bool connected_to_sds) { - DEBUG_PRINTF("opt %d\n", (int)connected_to_sds); - for (u32 i = 0; i < children.size();) { - DEBUG_PRINTF("opt %u: ctsds: %d\n", i, (int)connected_to_sds); - Component &sub = *children[i]; - - sub.optimise(connected_to_sds); - - bool vacuous = sub.vacuous_everywhere(); - - if (connected_to_sds && vacuous) { - DEBUG_PRINTF("delete opt %u\n", i); - auto it = children.begin() + i; - children.erase(it); - continue; - } - - connected_to_sds = connected_to_sds && vacuous; - i++; - } -} - -} // namespace ue2 +/* + * Copyright (c) 2015, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/** \file + * \brief Sequence of Component objects. + */ + + +#include "ComponentSequence.h" + +#include "buildstate.h" +#include "ComponentAlternation.h" +#include "ComponentRepeat.h" +#include "Parser.h" +#include "ue2common.h" +#include "parse_error.h" +#include "position_dump.h" +#include "position_info.h" +#include "nfagraph/ng_builder.h" +#include "util/container.h" +#include "util/make_unique.h" + +#include <algorithm> +#include <cassert> + +using namespace std; + +namespace ue2 { + +ComponentSequence::ComponentSequence() : capture_index(NOT_CAPTURED) {} + +ComponentSequence::~ComponentSequence() {} + +ComponentSequence::ComponentSequence(const ComponentSequence &other) + : Component(other), capture_index(other.capture_index) { + // Deep copy children. + for (const auto &c : other.children) { + assert(c); + children.push_back(unique_ptr<Component>(c->clone())); + } + if (other.alternation) { + const ComponentAlternation &c = *other.alternation; + alternation.reset(c.clone()); + } +} + +ComponentSequence *ComponentSequence::clone() const { + return new ComponentSequence(*this); +} + +Component *ComponentSequence::accept(ComponentVisitor &v) { + assert(!alternation); // Sequence must be finalized first. + + Component *c = v.visit(this); + if (c != this) { + v.post(this); + return c; + } + + for (auto i = children.begin(), e = children.end(); i != e; ++i) { + Component *child = i->get(); + c = (*i)->accept(v); + if (c != child) { + // Child has been replaced (new Component pointer) or we've been + // instructed to delete it (null). + i->reset(c); + } + } + + // Remove deleted children. + children.erase(remove(children.begin(), children.end(), nullptr), + children.end()); + + v.post(this); + return this; +} + +void ComponentSequence::accept(ConstComponentVisitor &v) const { + assert(!alternation); // Sequence must be finalized first. + + v.pre(*this); + + for (auto i = children.begin(), e = children.end(); i != e; ++i) { + (*i)->accept(v); + + if (i + 1 != e) { + v.during(*this); + } + } + + v.post(*this); +} + +void ComponentSequence::addComponent(unique_ptr<Component> comp) { + children.push_back(move(comp)); +} + +bool ComponentSequence::addRepeat(u32 min, u32 max, + ComponentRepeat::RepeatType type) { + if (children.empty() || min > max || max == 0) { + return false; + } + + // We can't apply a repeat to some types of component. + assert(children.back()); + if (!children.back()->repeatable()) { + return false; + } + + children.back() = makeComponentRepeat(move(children.back()), min, max, + type); + assert(children.back()); + return true; +} + +void ComponentSequence::addAlternation() { + if (!alternation) { + alternation = ue2::make_unique<ComponentAlternation>(); + } + + auto seq = ue2::make_unique<ComponentSequence>(); + seq->children.swap(children); + alternation->append(move(seq)); +} + +void ComponentSequence::finalize() { + if (alternation) { + addAlternation(); + assert(children.empty()); + children.push_back(move(alternation)); + alternation = nullptr; + } +} + +vector<PositionInfo> ComponentSequence::first() const { + vector<PositionInfo> firsts, subfirsts; + + for (const auto &c : children) { + subfirsts = c->first(); + replaceEpsilons(firsts, subfirsts); + if (!c->empty()) { + break; + } + } + + if (firsts.empty()) { + DEBUG_PRINTF("trivial empty sequence %zu\n", firsts.size()); + assert(children.empty()); + firsts.push_back(GlushkovBuildState::POS_EPSILON); + } + + DEBUG_PRINTF("%zu firsts\n", firsts.size()); + return firsts; +} + +namespace { +struct eps_info { + eps_info() : flags(0U) {} + u32 flags; +}; +} + +static +void epsilonVisit(vector<eps_info> *info, const vector<PositionInfo> &f) { + vector<eps_info> out; + out.reserve(info->size()); + + set<u32> seen_flags; + + assert(!info->empty()); + for (auto eps = find(f.begin(), f.end(), GlushkovBuildState::POS_EPSILON); + eps != f.end(); + eps = find(eps + 1, f.end(), GlushkovBuildState::POS_EPSILON)) { + for (auto it = info->begin(); it != info->end(); ++it) { + u32 flags = it->flags | eps->flags; + if (contains(seen_flags, flags)) { + continue; + } + + out.push_back(*it); + out.back().flags = flags; + seen_flags.insert(flags); + } + } + + info->swap(out); + assert(!info->empty()); +} + +static +void applyEpsilonVisits(vector<PositionInfo> &lasts, + const vector<eps_info> &eps_visits) { + vector<PositionInfo> out; + out.reserve(lasts.size() * eps_visits.size()); + + for (const auto &last : lasts) { + for (const auto &e : eps_visits) { + out.push_back(last); + out.back().flags |= e.flags; + } + } + + cleanupPositions(out); + lasts.swap(out); +} + +vector<PositionInfo> ComponentSequence::last() const { + vector<PositionInfo> lasts, sublasts; + vector<eps_info> visits(1); + + auto i = children.rbegin(), e = children.rend(); + for (; i != e; ++i) { + sublasts = (*i)->last(); + applyEpsilonVisits(sublasts, visits); + lasts.insert(lasts.end(), sublasts.begin(), sublasts.end()); + if ((*i)->empty()) { + // this epsilon's flags should propagate to subsequent lasts' + // enter/exit lists + epsilonVisit(&visits, (*i)->first()); + } else { + break; + } + } + + DEBUG_PRINTF("lasts = %s\n", + dumpPositions(lasts.begin(), lasts.end()).c_str()); + return lasts; +} + +bool ComponentSequence::empty(void) const { + // a sequence can be empty if all its subcomponents can be empty + for (const auto &c : children) { + if (!c->empty()) { + return false; + } + } + return true; +} + +void ComponentSequence::notePositions(GlushkovBuildState &bs) { + u32 pb = bs.getBuilder().numVertices(); + for (auto &c : children) { + c->notePositions(bs); + } + recordPosBounds(pb, bs.getBuilder().numVertices()); +} + +void ComponentSequence::buildFollowSet(GlushkovBuildState &bs, + const vector<PositionInfo> &lastPos) { + DEBUG_PRINTF("sequence of %zu components\n", children.size()); + + // If no components, no work to do. + if (children.empty()) { + return; + } + + // First element + children.front()->buildFollowSet(bs, lastPos); + if (children.size() == 1) { + // If our sequence contains precisely one component, then we've done + // all our work. Hooking up its firsts and lasts will be done by our + // parent component. + return; + } + + // Remaining elements, wiring last to first in sequence. + + vector<PositionInfo> prevLasts = children.front()->last(); + + for (auto it = next(children.begin()), ite = children.end(); it != ite; ++it) { + assert(*it); + Component &c = *(*it); + + // Build subcomponent follow set + c.buildFollowSet(bs, prevLasts); + + // FIRST(curr) + vector<PositionInfo> currFirsts(c.first()); + + // LAST(prev) => FIRST(curr) + DEBUG_PRINTF("connecting lasts (|| %zu) to firsts of comp %zd\n", + prevLasts.size(), it - children.begin()); + bs.connectRegions(prevLasts, currFirsts); + + // Generate a new LAST(prev) for the next iteration; either c->last() + // on its own if it can't be empty or c->last unioned with the previous + // last if c can be empty + vector<PositionInfo> currLasts(c.last()); + + if (!c.empty()) { + // Current component can't be empty, so use its lasts only + prevLasts.swap(currLasts); + DEBUG_PRINTF("swapped lasts\n"); + } else { + // Add current lasts to previous lasts + DEBUG_PRINTF("doing stuff for empty comp\n"); + prevLasts.insert(prevLasts.end(), currLasts.begin(), currLasts.end()); + DEBUG_PRINTF("done stuff for empty comp\n"); + } + } +} + +bool ComponentSequence::checkEmbeddedStartAnchor(bool at_start) const { + for (const auto &c : children) { + at_start = c->checkEmbeddedStartAnchor(at_start); + } + + return at_start; +} + +bool ComponentSequence::checkEmbeddedEndAnchor(bool at_end) const { + // Note reversed ordering. + for (auto i = children.rbegin(), e = children.rend(); i != e; ++i) { + at_end = (*i)->checkEmbeddedEndAnchor(at_end); + } + + return at_end; +} + +bool ComponentSequence::vacuous_everywhere() const { + for (const auto &c : children) { + if (!c->vacuous_everywhere()) { + return false; + } + } + return true; +} + +void ComponentSequence::optimise(bool connected_to_sds) { + DEBUG_PRINTF("opt %d\n", (int)connected_to_sds); + for (u32 i = 0; i < children.size();) { + DEBUG_PRINTF("opt %u: ctsds: %d\n", i, (int)connected_to_sds); + Component &sub = *children[i]; + + sub.optimise(connected_to_sds); + + bool vacuous = sub.vacuous_everywhere(); + + if (connected_to_sds && vacuous) { + DEBUG_PRINTF("delete opt %u\n", i); + auto it = children.begin() + i; + children.erase(it); + continue; + } + + connected_to_sds = connected_to_sds && vacuous; + i++; + } +} + +} // namespace ue2 diff --git a/contrib/libs/hyperscan/src/parser/ComponentSequence.h b/contrib/libs/hyperscan/src/parser/ComponentSequence.h index 12c35f607a..08e57d0a3d 100644 --- a/contrib/libs/hyperscan/src/parser/ComponentSequence.h +++ b/contrib/libs/hyperscan/src/parser/ComponentSequence.h @@ -1,108 +1,108 @@ -/* - * Copyright (c) 2015, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -/** \file - * \brief Sequence of Component objects. - */ - -#ifndef COMPONENT_SEQUENCE_H -#define COMPONENT_SEQUENCE_H - -#include "Component.h" -#include "ComponentRepeat.h" // for ComponentRepeat::RepeatType -#include "ue2common.h" - -#include <memory> -#include <set> -#include <vector> - -namespace ue2 { - -class ComponentAlternation; -class GlushkovBuildState; - -// Encapsulates a number of sub expressions to be applied sequentially -class ComponentSequence : public Component { - friend class DumpVisitor; - friend class PrintVisitor; - friend class SimplifyVisitor; -public: - /** \brief capture index representing a sequence that ISN'T capturing */ - static constexpr unsigned int NOT_CAPTURED = 65536; - - ComponentSequence(); - ~ComponentSequence() override; - ComponentSequence *clone() const override; - Component *accept(ComponentVisitor &v) override; - void accept(ConstComponentVisitor &v) const override; - - bool addRepeat(u32 min, u32 max, ComponentRepeat::RepeatType type); - - // overridden by ComponentCondReference, which can only have 1 or 2 - // branches. - virtual void addAlternation(); - - virtual void finalize(); - - void addComponent(std::unique_ptr<Component> comp); - - std::vector<PositionInfo> first() const override; - std::vector<PositionInfo> last() const override; - bool empty(void) const override; - bool vacuous_everywhere() const override; - void notePositions(GlushkovBuildState &bs) override; - void buildFollowSet(GlushkovBuildState &bs, - const std::vector<PositionInfo> &lastPos) override; - bool checkEmbeddedStartAnchor(bool at_start) const override; - bool checkEmbeddedEndAnchor(bool at_end) const override; - - void optimise(bool connected_to_sds) override; - - void setCaptureIndex(unsigned int idx) { capture_index = idx; } - unsigned int getCaptureIndex() const { return capture_index; } - void setCaptureName(const std::string &s) { capture_name = s; } - const std::string &getCaptureName() const { return capture_name; } - - virtual const std::vector<std::unique_ptr<Component>> &getChildren() const { - return children; - } - -protected: - ComponentSequence(const ComponentSequence &other); - - std::vector<std::unique_ptr<Component>> children; - std::unique_ptr<ComponentAlternation> alternation; - -private: - unsigned int capture_index; - std::string capture_name; //!< empty means no name -}; - -} // namespace ue2 - -#endif +/* + * Copyright (c) 2015, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/** \file + * \brief Sequence of Component objects. + */ + +#ifndef COMPONENT_SEQUENCE_H +#define COMPONENT_SEQUENCE_H + +#include "Component.h" +#include "ComponentRepeat.h" // for ComponentRepeat::RepeatType +#include "ue2common.h" + +#include <memory> +#include <set> +#include <vector> + +namespace ue2 { + +class ComponentAlternation; +class GlushkovBuildState; + +// Encapsulates a number of sub expressions to be applied sequentially +class ComponentSequence : public Component { + friend class DumpVisitor; + friend class PrintVisitor; + friend class SimplifyVisitor; +public: + /** \brief capture index representing a sequence that ISN'T capturing */ + static constexpr unsigned int NOT_CAPTURED = 65536; + + ComponentSequence(); + ~ComponentSequence() override; + ComponentSequence *clone() const override; + Component *accept(ComponentVisitor &v) override; + void accept(ConstComponentVisitor &v) const override; + + bool addRepeat(u32 min, u32 max, ComponentRepeat::RepeatType type); + + // overridden by ComponentCondReference, which can only have 1 or 2 + // branches. + virtual void addAlternation(); + + virtual void finalize(); + + void addComponent(std::unique_ptr<Component> comp); + + std::vector<PositionInfo> first() const override; + std::vector<PositionInfo> last() const override; + bool empty(void) const override; + bool vacuous_everywhere() const override; + void notePositions(GlushkovBuildState &bs) override; + void buildFollowSet(GlushkovBuildState &bs, + const std::vector<PositionInfo> &lastPos) override; + bool checkEmbeddedStartAnchor(bool at_start) const override; + bool checkEmbeddedEndAnchor(bool at_end) const override; + + void optimise(bool connected_to_sds) override; + + void setCaptureIndex(unsigned int idx) { capture_index = idx; } + unsigned int getCaptureIndex() const { return capture_index; } + void setCaptureName(const std::string &s) { capture_name = s; } + const std::string &getCaptureName() const { return capture_name; } + + virtual const std::vector<std::unique_ptr<Component>> &getChildren() const { + return children; + } + +protected: + ComponentSequence(const ComponentSequence &other); + + std::vector<std::unique_ptr<Component>> children; + std::unique_ptr<ComponentAlternation> alternation; + +private: + unsigned int capture_index; + std::string capture_name; //!< empty means no name +}; + +} // namespace ue2 + +#endif diff --git a/contrib/libs/hyperscan/src/parser/ComponentVisitor.cpp b/contrib/libs/hyperscan/src/parser/ComponentVisitor.cpp index d1a82af22a..b7bbba23ee 100644 --- a/contrib/libs/hyperscan/src/parser/ComponentVisitor.cpp +++ b/contrib/libs/hyperscan/src/parser/ComponentVisitor.cpp @@ -1,76 +1,76 @@ -/* - * Copyright (c) 2015, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -#include "AsciiComponentClass.h" -#include "ComponentVisitor.h" -#include "ComponentAlternation.h" -#include "ComponentAssertion.h" -#include "ComponentAtomicGroup.h" -#include "ComponentBackReference.h" -#include "ComponentBoundary.h" -#include "ComponentByte.h" -#include "ComponentCondReference.h" -#include "ComponentClass.h" -#include "ComponentEmpty.h" -#include "ComponentEUS.h" -#include "ComponentRepeat.h" -#include "ComponentSequence.h" -#include "ComponentWordBoundary.h" -#include "Utf8ComponentClass.h" - -namespace ue2 { - -ComponentVisitor::~ComponentVisitor() { - // empty -} - -// Default implementations. - -DefaultComponentVisitor::DefaultComponentVisitor() {} -DefaultComponentVisitor::~DefaultComponentVisitor() {} - -#define DEFAULT_FUNCS(comp) \ - Component *DefaultComponentVisitor::visit(comp *c) { return c; } \ - void DefaultComponentVisitor::post(comp *) {} - -DEFAULT_FUNCS(AsciiComponentClass) -DEFAULT_FUNCS(ComponentAlternation) -DEFAULT_FUNCS(ComponentAssertion) -DEFAULT_FUNCS(ComponentAtomicGroup) -DEFAULT_FUNCS(ComponentBackReference) -DEFAULT_FUNCS(ComponentBoundary) -DEFAULT_FUNCS(ComponentByte) -DEFAULT_FUNCS(ComponentCondReference) -DEFAULT_FUNCS(ComponentEmpty) -DEFAULT_FUNCS(ComponentEUS) -DEFAULT_FUNCS(ComponentRepeat) -DEFAULT_FUNCS(ComponentSequence) -DEFAULT_FUNCS(ComponentWordBoundary) -DEFAULT_FUNCS(UTF8ComponentClass) - -} // namespace +/* + * Copyright (c) 2015, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include "AsciiComponentClass.h" +#include "ComponentVisitor.h" +#include "ComponentAlternation.h" +#include "ComponentAssertion.h" +#include "ComponentAtomicGroup.h" +#include "ComponentBackReference.h" +#include "ComponentBoundary.h" +#include "ComponentByte.h" +#include "ComponentCondReference.h" +#include "ComponentClass.h" +#include "ComponentEmpty.h" +#include "ComponentEUS.h" +#include "ComponentRepeat.h" +#include "ComponentSequence.h" +#include "ComponentWordBoundary.h" +#include "Utf8ComponentClass.h" + +namespace ue2 { + +ComponentVisitor::~ComponentVisitor() { + // empty +} + +// Default implementations. + +DefaultComponentVisitor::DefaultComponentVisitor() {} +DefaultComponentVisitor::~DefaultComponentVisitor() {} + +#define DEFAULT_FUNCS(comp) \ + Component *DefaultComponentVisitor::visit(comp *c) { return c; } \ + void DefaultComponentVisitor::post(comp *) {} + +DEFAULT_FUNCS(AsciiComponentClass) +DEFAULT_FUNCS(ComponentAlternation) +DEFAULT_FUNCS(ComponentAssertion) +DEFAULT_FUNCS(ComponentAtomicGroup) +DEFAULT_FUNCS(ComponentBackReference) +DEFAULT_FUNCS(ComponentBoundary) +DEFAULT_FUNCS(ComponentByte) +DEFAULT_FUNCS(ComponentCondReference) +DEFAULT_FUNCS(ComponentEmpty) +DEFAULT_FUNCS(ComponentEUS) +DEFAULT_FUNCS(ComponentRepeat) +DEFAULT_FUNCS(ComponentSequence) +DEFAULT_FUNCS(ComponentWordBoundary) +DEFAULT_FUNCS(UTF8ComponentClass) + +} // namespace diff --git a/contrib/libs/hyperscan/src/parser/ComponentVisitor.h b/contrib/libs/hyperscan/src/parser/ComponentVisitor.h index e906c7fc4d..be28d33610 100644 --- a/contrib/libs/hyperscan/src/parser/ComponentVisitor.h +++ b/contrib/libs/hyperscan/src/parser/ComponentVisitor.h @@ -1,150 +1,150 @@ -/* - * Copyright (c) 2015, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -/** \file - * \brief Visitor base class for working with the component tree. - */ - -#ifndef COMPONENTVISITOR_H -#define COMPONENTVISITOR_H - -namespace ue2 { - -class AsciiComponentClass; -class Component; -class ComponentAlternation; -class ComponentAssertion; -class ComponentAtomicGroup; -class ComponentBackReference; -class ComponentBoundary; -class ComponentByte; -class ComponentClass; -class ComponentCondReference; -class ComponentEmpty; -class ComponentEUS; -class ComponentRepeat; -class ComponentSequence; -class ComponentWordBoundary; -class UTF8ComponentClass; - -/** - * \brief Visitor base class for working with the component tree. - * - * Our approach to implementing the visitor pattern for traversing (and - * optionally mutating) the Component tree for a pattern. Each _visit_ function - * takes a Component subclass pointer in and returns a Component pointer. That - * pointer can have several values, dictating what the containing Component - * should do: - * - * 1. If ptr == c, then do nothing. - * 2. If ptr == nullptr, then remove c from the tree. - * 3. If ptr != c && ptr != nullptr, then replace c with ptr. - * - * Traversal order is pre-order. - * - * After a Component's subcomponents have been visited, the _post_ function for - * that Component will be called. - */ -class ComponentVisitor { -public: - virtual ~ComponentVisitor(); - - virtual Component *visit(AsciiComponentClass *c) = 0; - virtual Component *visit(ComponentAlternation *c) = 0; - virtual Component *visit(ComponentAssertion *c) = 0; - virtual Component *visit(ComponentAtomicGroup *c) = 0; - virtual Component *visit(ComponentBackReference *c) = 0; - virtual Component *visit(ComponentBoundary *c) = 0; - virtual Component *visit(ComponentByte *c) = 0; - virtual Component *visit(ComponentCondReference *c) = 0; - virtual Component *visit(ComponentEmpty *c) = 0; - virtual Component *visit(ComponentEUS *c) = 0; - virtual Component *visit(ComponentRepeat *c) = 0; - virtual Component *visit(ComponentSequence *c) = 0; - virtual Component *visit(ComponentWordBoundary *c) = 0; - virtual Component *visit(UTF8ComponentClass *c) = 0; - - virtual void post(AsciiComponentClass *c) = 0; - virtual void post(ComponentAlternation *c) = 0; - virtual void post(ComponentAssertion *c) = 0; - virtual void post(ComponentAtomicGroup *c) = 0; - virtual void post(ComponentBackReference *c) = 0; - virtual void post(ComponentBoundary *c) = 0; - virtual void post(ComponentByte *c) = 0; - virtual void post(ComponentCondReference *c) = 0; - virtual void post(ComponentEmpty *c) = 0; - virtual void post(ComponentEUS *c) = 0; - virtual void post(ComponentRepeat *c) = 0; - virtual void post(ComponentSequence *c) = 0; - virtual void post(ComponentWordBoundary *c) = 0; - virtual void post(UTF8ComponentClass *c) = 0; -}; - -/** - * \brief Concrete subclass of ComponentVisitor with default behaviour, - * allowing you to just implement the member functions you need. - */ -class DefaultComponentVisitor : public ComponentVisitor { -public: - DefaultComponentVisitor(); - ~DefaultComponentVisitor() override; - - Component *visit(AsciiComponentClass *c) override; - Component *visit(ComponentAlternation *c) override; - Component *visit(ComponentAssertion *c) override; - Component *visit(ComponentAtomicGroup *c) override; - Component *visit(ComponentBackReference *c) override; - Component *visit(ComponentBoundary *c) override; - Component *visit(ComponentByte *c) override; - Component *visit(ComponentCondReference *c) override; - Component *visit(ComponentEmpty *c) override; - Component *visit(ComponentEUS *c) override; - Component *visit(ComponentRepeat *c) override; - Component *visit(ComponentSequence *c) override; - Component *visit(ComponentWordBoundary *c) override; - Component *visit(UTF8ComponentClass *c) override; - - void post(AsciiComponentClass *c) override; - void post(ComponentAlternation *c) override; - void post(ComponentAssertion *c) override; - void post(ComponentAtomicGroup *c) override; - void post(ComponentBackReference *c) override; - void post(ComponentBoundary *c) override; - void post(ComponentByte *c) override; - void post(ComponentCondReference *c) override; - void post(ComponentEmpty *c) override; - void post(ComponentEUS *c) override; - void post(ComponentRepeat *c) override; - void post(ComponentSequence *c) override; - void post(ComponentWordBoundary *c) override; - void post(UTF8ComponentClass *c) override; -}; - -} // namespace ue2 - -#endif // COMPONENTVISITOR_H +/* + * Copyright (c) 2015, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/** \file + * \brief Visitor base class for working with the component tree. + */ + +#ifndef COMPONENTVISITOR_H +#define COMPONENTVISITOR_H + +namespace ue2 { + +class AsciiComponentClass; +class Component; +class ComponentAlternation; +class ComponentAssertion; +class ComponentAtomicGroup; +class ComponentBackReference; +class ComponentBoundary; +class ComponentByte; +class ComponentClass; +class ComponentCondReference; +class ComponentEmpty; +class ComponentEUS; +class ComponentRepeat; +class ComponentSequence; +class ComponentWordBoundary; +class UTF8ComponentClass; + +/** + * \brief Visitor base class for working with the component tree. + * + * Our approach to implementing the visitor pattern for traversing (and + * optionally mutating) the Component tree for a pattern. Each _visit_ function + * takes a Component subclass pointer in and returns a Component pointer. That + * pointer can have several values, dictating what the containing Component + * should do: + * + * 1. If ptr == c, then do nothing. + * 2. If ptr == nullptr, then remove c from the tree. + * 3. If ptr != c && ptr != nullptr, then replace c with ptr. + * + * Traversal order is pre-order. + * + * After a Component's subcomponents have been visited, the _post_ function for + * that Component will be called. + */ +class ComponentVisitor { +public: + virtual ~ComponentVisitor(); + + virtual Component *visit(AsciiComponentClass *c) = 0; + virtual Component *visit(ComponentAlternation *c) = 0; + virtual Component *visit(ComponentAssertion *c) = 0; + virtual Component *visit(ComponentAtomicGroup *c) = 0; + virtual Component *visit(ComponentBackReference *c) = 0; + virtual Component *visit(ComponentBoundary *c) = 0; + virtual Component *visit(ComponentByte *c) = 0; + virtual Component *visit(ComponentCondReference *c) = 0; + virtual Component *visit(ComponentEmpty *c) = 0; + virtual Component *visit(ComponentEUS *c) = 0; + virtual Component *visit(ComponentRepeat *c) = 0; + virtual Component *visit(ComponentSequence *c) = 0; + virtual Component *visit(ComponentWordBoundary *c) = 0; + virtual Component *visit(UTF8ComponentClass *c) = 0; + + virtual void post(AsciiComponentClass *c) = 0; + virtual void post(ComponentAlternation *c) = 0; + virtual void post(ComponentAssertion *c) = 0; + virtual void post(ComponentAtomicGroup *c) = 0; + virtual void post(ComponentBackReference *c) = 0; + virtual void post(ComponentBoundary *c) = 0; + virtual void post(ComponentByte *c) = 0; + virtual void post(ComponentCondReference *c) = 0; + virtual void post(ComponentEmpty *c) = 0; + virtual void post(ComponentEUS *c) = 0; + virtual void post(ComponentRepeat *c) = 0; + virtual void post(ComponentSequence *c) = 0; + virtual void post(ComponentWordBoundary *c) = 0; + virtual void post(UTF8ComponentClass *c) = 0; +}; + +/** + * \brief Concrete subclass of ComponentVisitor with default behaviour, + * allowing you to just implement the member functions you need. + */ +class DefaultComponentVisitor : public ComponentVisitor { +public: + DefaultComponentVisitor(); + ~DefaultComponentVisitor() override; + + Component *visit(AsciiComponentClass *c) override; + Component *visit(ComponentAlternation *c) override; + Component *visit(ComponentAssertion *c) override; + Component *visit(ComponentAtomicGroup *c) override; + Component *visit(ComponentBackReference *c) override; + Component *visit(ComponentBoundary *c) override; + Component *visit(ComponentByte *c) override; + Component *visit(ComponentCondReference *c) override; + Component *visit(ComponentEmpty *c) override; + Component *visit(ComponentEUS *c) override; + Component *visit(ComponentRepeat *c) override; + Component *visit(ComponentSequence *c) override; + Component *visit(ComponentWordBoundary *c) override; + Component *visit(UTF8ComponentClass *c) override; + + void post(AsciiComponentClass *c) override; + void post(ComponentAlternation *c) override; + void post(ComponentAssertion *c) override; + void post(ComponentAtomicGroup *c) override; + void post(ComponentBackReference *c) override; + void post(ComponentBoundary *c) override; + void post(ComponentByte *c) override; + void post(ComponentCondReference *c) override; + void post(ComponentEmpty *c) override; + void post(ComponentEUS *c) override; + void post(ComponentRepeat *c) override; + void post(ComponentSequence *c) override; + void post(ComponentWordBoundary *c) override; + void post(UTF8ComponentClass *c) override; +}; + +} // namespace ue2 + +#endif // COMPONENTVISITOR_H diff --git a/contrib/libs/hyperscan/src/parser/ComponentWordBoundary.cpp b/contrib/libs/hyperscan/src/parser/ComponentWordBoundary.cpp index adad41b308..168a2aad8e 100644 --- a/contrib/libs/hyperscan/src/parser/ComponentWordBoundary.cpp +++ b/contrib/libs/hyperscan/src/parser/ComponentWordBoundary.cpp @@ -1,105 +1,105 @@ -/* - * Copyright (c) 2015, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -/** \file - * \brief Word Boundary Assertion (\\b or \\B) - */ -#include "ComponentWordBoundary.h" -#include "buildstate.h" -#include "parse_error.h" -#include "Parser.h" -#include "position_info.h" -#include "nfagraph/ng_builder.h" - -using namespace std; - -namespace ue2 { - -ComponentWordBoundary::ComponentWordBoundary(u32 loc_in, bool neg, - const ParseMode &mode) - : loc(loc_in), position(GlushkovBuildState::POS_UNINITIALIZED), - negated(neg), ucp(mode.ucp), prefilter(false) {} - -ComponentWordBoundary::~ComponentWordBoundary() { - // empty -} - -ComponentWordBoundary * ComponentWordBoundary::clone() const { - return new ComponentWordBoundary(*this); -} - -vector<PositionInfo> ComponentWordBoundary::first() const { - vector<PositionInfo> firsts; - firsts.push_back(position); - return firsts; -} - -vector<PositionInfo> ComponentWordBoundary::last() const { - // Same as firsts - return first(); -} - -bool ComponentWordBoundary::empty() const { - return false; -} - -bool ComponentWordBoundary::repeatable() const { - return false; -} - -void ComponentWordBoundary::notePositions(GlushkovBuildState &bs) { - NFABuilder &builder = bs.getBuilder(); - position = builder.makePositions(1); - - if (ucp) { - assert(prefilter); // only in prefiltering mode! - if (negated) { - builder.setAssertFlag(position, POS_FLAG_ASSERT_WORD_TO_WORD_UCP - | POS_FLAG_ASSERT_NONWORD_TO_NONWORD_UCP); - } else { - builder.setAssertFlag(position, POS_FLAG_ASSERT_WORD_TO_NONWORD_UCP - | POS_FLAG_ASSERT_NONWORD_TO_WORD_UCP); - } - } else { - if (negated) { - builder.setAssertFlag(position, POS_FLAG_ASSERT_WORD_TO_WORD - | POS_FLAG_ASSERT_NONWORD_TO_NONWORD); - } else { - builder.setAssertFlag(position, POS_FLAG_ASSERT_WORD_TO_NONWORD - | POS_FLAG_ASSERT_NONWORD_TO_WORD); - } - } - recordPosBounds(position, position + 1); -} - -void ComponentWordBoundary::buildFollowSet(GlushkovBuildState&, - const vector<PositionInfo>&) { - // No internal connections, nowt to do -} - -} // namespace ue2 +/* + * Copyright (c) 2015, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/** \file + * \brief Word Boundary Assertion (\\b or \\B) + */ +#include "ComponentWordBoundary.h" +#include "buildstate.h" +#include "parse_error.h" +#include "Parser.h" +#include "position_info.h" +#include "nfagraph/ng_builder.h" + +using namespace std; + +namespace ue2 { + +ComponentWordBoundary::ComponentWordBoundary(u32 loc_in, bool neg, + const ParseMode &mode) + : loc(loc_in), position(GlushkovBuildState::POS_UNINITIALIZED), + negated(neg), ucp(mode.ucp), prefilter(false) {} + +ComponentWordBoundary::~ComponentWordBoundary() { + // empty +} + +ComponentWordBoundary * ComponentWordBoundary::clone() const { + return new ComponentWordBoundary(*this); +} + +vector<PositionInfo> ComponentWordBoundary::first() const { + vector<PositionInfo> firsts; + firsts.push_back(position); + return firsts; +} + +vector<PositionInfo> ComponentWordBoundary::last() const { + // Same as firsts + return first(); +} + +bool ComponentWordBoundary::empty() const { + return false; +} + +bool ComponentWordBoundary::repeatable() const { + return false; +} + +void ComponentWordBoundary::notePositions(GlushkovBuildState &bs) { + NFABuilder &builder = bs.getBuilder(); + position = builder.makePositions(1); + + if (ucp) { + assert(prefilter); // only in prefiltering mode! + if (negated) { + builder.setAssertFlag(position, POS_FLAG_ASSERT_WORD_TO_WORD_UCP + | POS_FLAG_ASSERT_NONWORD_TO_NONWORD_UCP); + } else { + builder.setAssertFlag(position, POS_FLAG_ASSERT_WORD_TO_NONWORD_UCP + | POS_FLAG_ASSERT_NONWORD_TO_WORD_UCP); + } + } else { + if (negated) { + builder.setAssertFlag(position, POS_FLAG_ASSERT_WORD_TO_WORD + | POS_FLAG_ASSERT_NONWORD_TO_NONWORD); + } else { + builder.setAssertFlag(position, POS_FLAG_ASSERT_WORD_TO_NONWORD + | POS_FLAG_ASSERT_NONWORD_TO_WORD); + } + } + recordPosBounds(position, position + 1); +} + +void ComponentWordBoundary::buildFollowSet(GlushkovBuildState&, + const vector<PositionInfo>&) { + // No internal connections, nowt to do +} + +} // namespace ue2 diff --git a/contrib/libs/hyperscan/src/parser/ComponentWordBoundary.h b/contrib/libs/hyperscan/src/parser/ComponentWordBoundary.h index 40c317793c..8cf7654666 100644 --- a/contrib/libs/hyperscan/src/parser/ComponentWordBoundary.h +++ b/contrib/libs/hyperscan/src/parser/ComponentWordBoundary.h @@ -1,90 +1,90 @@ -/* - * Copyright (c) 2015, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -/** \file - * \brief Word Boundary Assertion (\\b or \\B) - */ - -#ifndef _RE_COMPONENTWORDBOUNDARY_H_ -#define _RE_COMPONENTWORDBOUNDARY_H_ - -#include "Component.h" -#include "position.h" - -namespace ue2 { - -struct ParseMode; - -/** \brief Encapsulates a positive (\\b) or negative (\\B) word boundary - * assertion. */ -class ComponentWordBoundary : public Component { - friend class DumpVisitor; - friend class PrintVisitor; - friend class UnsupportedVisitor; -public: - ComponentWordBoundary(u32 loc, bool negated, const ParseMode &mode); - ~ComponentWordBoundary() override; - ComponentWordBoundary *clone() const override; - - Component *accept(ComponentVisitor &v) override { - Component *c = v.visit(this); - v.post(this); - return c; - } - - void accept(ConstComponentVisitor &v) const override { - v.pre(*this); - v.during(*this); - v.post(*this); - } - - std::vector<PositionInfo> first() const override; - std::vector<PositionInfo> last() const override; - bool empty() const override; - bool repeatable() const override; - void notePositions(GlushkovBuildState &bs) override; - void buildFollowSet(GlushkovBuildState &bs, - const std::vector<PositionInfo> &lastPos) override; - - void setPrefilter(bool p) { prefilter = p; } - -private: - u32 loc; //!< location in pattern for error reporting. - Position position; - bool negated; - bool ucp; - bool prefilter; //!< set by PrefilterVisitor, this is ugly - - ComponentWordBoundary(const ComponentWordBoundary &other) - : Component(other), loc(other.loc), position(other.position), - negated(other.negated), ucp(other.ucp), prefilter(other.prefilter) {} -}; - -} // namespace ue2 - -#endif +/* + * Copyright (c) 2015, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/** \file + * \brief Word Boundary Assertion (\\b or \\B) + */ + +#ifndef _RE_COMPONENTWORDBOUNDARY_H_ +#define _RE_COMPONENTWORDBOUNDARY_H_ + +#include "Component.h" +#include "position.h" + +namespace ue2 { + +struct ParseMode; + +/** \brief Encapsulates a positive (\\b) or negative (\\B) word boundary + * assertion. */ +class ComponentWordBoundary : public Component { + friend class DumpVisitor; + friend class PrintVisitor; + friend class UnsupportedVisitor; +public: + ComponentWordBoundary(u32 loc, bool negated, const ParseMode &mode); + ~ComponentWordBoundary() override; + ComponentWordBoundary *clone() const override; + + Component *accept(ComponentVisitor &v) override { + Component *c = v.visit(this); + v.post(this); + return c; + } + + void accept(ConstComponentVisitor &v) const override { + v.pre(*this); + v.during(*this); + v.post(*this); + } + + std::vector<PositionInfo> first() const override; + std::vector<PositionInfo> last() const override; + bool empty() const override; + bool repeatable() const override; + void notePositions(GlushkovBuildState &bs) override; + void buildFollowSet(GlushkovBuildState &bs, + const std::vector<PositionInfo> &lastPos) override; + + void setPrefilter(bool p) { prefilter = p; } + +private: + u32 loc; //!< location in pattern for error reporting. + Position position; + bool negated; + bool ucp; + bool prefilter; //!< set by PrefilterVisitor, this is ugly + + ComponentWordBoundary(const ComponentWordBoundary &other) + : Component(other), loc(other.loc), position(other.position), + negated(other.negated), ucp(other.ucp), prefilter(other.prefilter) {} +}; + +} // namespace ue2 + +#endif diff --git a/contrib/libs/hyperscan/src/parser/ConstComponentVisitor.cpp b/contrib/libs/hyperscan/src/parser/ConstComponentVisitor.cpp index 735289916c..b6413ecdb3 100644 --- a/contrib/libs/hyperscan/src/parser/ConstComponentVisitor.cpp +++ b/contrib/libs/hyperscan/src/parser/ConstComponentVisitor.cpp @@ -1,78 +1,78 @@ -/* - * Copyright (c) 2015, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -#include "ConstComponentVisitor.h" - -#include "AsciiComponentClass.h" -#include "ComponentAlternation.h" -#include "ComponentAssertion.h" -#include "ComponentAtomicGroup.h" -#include "ComponentBackReference.h" -#include "ComponentBoundary.h" -#include "ComponentByte.h" -#include "ComponentCondReference.h" -#include "ComponentClass.h" -#include "ComponentEmpty.h" -#include "ComponentEUS.h" -#include "ComponentRepeat.h" -#include "ComponentSequence.h" -#include "ComponentWordBoundary.h" -#include "Utf8ComponentClass.h" - -namespace ue2 { - -ConstComponentVisitor::~ConstComponentVisitor() { - // empty -} - -// Default implementations. - -DefaultConstComponentVisitor::DefaultConstComponentVisitor() {} -DefaultConstComponentVisitor::~DefaultConstComponentVisitor() {} - -#define DEFAULT_FUNCS(comp) \ - void DefaultConstComponentVisitor::pre(const comp &) {} \ - void DefaultConstComponentVisitor::during(const comp &) {} \ - void DefaultConstComponentVisitor::post(const comp &) {} - -DEFAULT_FUNCS(AsciiComponentClass) -DEFAULT_FUNCS(ComponentAlternation) -DEFAULT_FUNCS(ComponentAssertion) -DEFAULT_FUNCS(ComponentAtomicGroup) -DEFAULT_FUNCS(ComponentBackReference) -DEFAULT_FUNCS(ComponentBoundary) -DEFAULT_FUNCS(ComponentByte) -DEFAULT_FUNCS(ComponentCondReference) -DEFAULT_FUNCS(ComponentEmpty) -DEFAULT_FUNCS(ComponentEUS) -DEFAULT_FUNCS(ComponentRepeat) -DEFAULT_FUNCS(ComponentSequence) -DEFAULT_FUNCS(ComponentWordBoundary) -DEFAULT_FUNCS(UTF8ComponentClass) - -} // namespace ue2 +/* + * Copyright (c) 2015, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include "ConstComponentVisitor.h" + +#include "AsciiComponentClass.h" +#include "ComponentAlternation.h" +#include "ComponentAssertion.h" +#include "ComponentAtomicGroup.h" +#include "ComponentBackReference.h" +#include "ComponentBoundary.h" +#include "ComponentByte.h" +#include "ComponentCondReference.h" +#include "ComponentClass.h" +#include "ComponentEmpty.h" +#include "ComponentEUS.h" +#include "ComponentRepeat.h" +#include "ComponentSequence.h" +#include "ComponentWordBoundary.h" +#include "Utf8ComponentClass.h" + +namespace ue2 { + +ConstComponentVisitor::~ConstComponentVisitor() { + // empty +} + +// Default implementations. + +DefaultConstComponentVisitor::DefaultConstComponentVisitor() {} +DefaultConstComponentVisitor::~DefaultConstComponentVisitor() {} + +#define DEFAULT_FUNCS(comp) \ + void DefaultConstComponentVisitor::pre(const comp &) {} \ + void DefaultConstComponentVisitor::during(const comp &) {} \ + void DefaultConstComponentVisitor::post(const comp &) {} + +DEFAULT_FUNCS(AsciiComponentClass) +DEFAULT_FUNCS(ComponentAlternation) +DEFAULT_FUNCS(ComponentAssertion) +DEFAULT_FUNCS(ComponentAtomicGroup) +DEFAULT_FUNCS(ComponentBackReference) +DEFAULT_FUNCS(ComponentBoundary) +DEFAULT_FUNCS(ComponentByte) +DEFAULT_FUNCS(ComponentCondReference) +DEFAULT_FUNCS(ComponentEmpty) +DEFAULT_FUNCS(ComponentEUS) +DEFAULT_FUNCS(ComponentRepeat) +DEFAULT_FUNCS(ComponentSequence) +DEFAULT_FUNCS(ComponentWordBoundary) +DEFAULT_FUNCS(UTF8ComponentClass) + +} // namespace ue2 diff --git a/contrib/libs/hyperscan/src/parser/ConstComponentVisitor.h b/contrib/libs/hyperscan/src/parser/ConstComponentVisitor.h index 54026c92a9..c26f589e88 100644 --- a/contrib/libs/hyperscan/src/parser/ConstComponentVisitor.h +++ b/contrib/libs/hyperscan/src/parser/ConstComponentVisitor.h @@ -1,170 +1,170 @@ -/* - * Copyright (c) 2015, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -/** \file - * \brief Visitor base class for working with the component tree. - */ - -#ifndef CONSTCOMPONENTVISITOR_H -#define CONSTCOMPONENTVISITOR_H - -namespace ue2 { - -class AsciiComponentClass; -class Component; -class ComponentAlternation; -class ComponentAssertion; -class ComponentAtomicGroup; -class ComponentBackReference; -class ComponentBoundary; -class ComponentByte; -class ComponentCondReference; -class ComponentClass; -class ComponentEmpty; -class ComponentEUS; -class ComponentRepeat; -class ComponentSequence; -class ComponentWordBoundary; -class UTF8ComponentClass; - -/** - * \brief Visitor base class for traversing an immutable component tree. - * - * Our approach to implementing the visitor pattern for traversing the - * Component tree for a pattern. This version operates on an immutable tree; - * use \ref ComponentVisitor if you need to make changes to components during - * traversal. - */ -class ConstComponentVisitor { -public: - virtual ~ConstComponentVisitor(); - - virtual void pre(const AsciiComponentClass &c) = 0; - virtual void pre(const ComponentAlternation &c) = 0; - virtual void pre(const ComponentAssertion &c) = 0; - virtual void pre(const ComponentAtomicGroup &c) = 0; - virtual void pre(const ComponentBackReference &c) = 0; - virtual void pre(const ComponentBoundary &c) = 0; - virtual void pre(const ComponentByte &c) = 0; - virtual void pre(const ComponentCondReference &c) = 0; - virtual void pre(const ComponentEmpty &c) = 0; - virtual void pre(const ComponentEUS &c) = 0; - virtual void pre(const ComponentRepeat &c) = 0; - virtual void pre(const ComponentSequence &c) = 0; - virtual void pre(const ComponentWordBoundary &c) = 0; - virtual void pre(const UTF8ComponentClass &c) = 0; - - virtual void during(const AsciiComponentClass &c) = 0; - virtual void during(const ComponentAlternation &c) = 0; - virtual void during(const ComponentAssertion &c) = 0; - virtual void during(const ComponentAtomicGroup &c) = 0; - virtual void during(const ComponentBackReference &c) = 0; - virtual void during(const ComponentBoundary &c) = 0; - virtual void during(const ComponentByte &c) = 0; - virtual void during(const ComponentCondReference &c) = 0; - virtual void during(const ComponentEmpty &c) = 0; - virtual void during(const ComponentEUS &c) = 0; - virtual void during(const ComponentRepeat &c) = 0; - virtual void during(const ComponentSequence &c) = 0; - virtual void during(const ComponentWordBoundary &c) = 0; - virtual void during(const UTF8ComponentClass &c) = 0; - - virtual void post(const AsciiComponentClass &c) = 0; - virtual void post(const ComponentAlternation &c) = 0; - virtual void post(const ComponentAssertion &c) = 0; - virtual void post(const ComponentAtomicGroup &c) = 0; - virtual void post(const ComponentBackReference &c) = 0; - virtual void post(const ComponentBoundary &c) = 0; - virtual void post(const ComponentByte &c) = 0; - virtual void post(const ComponentCondReference &c) = 0; - virtual void post(const ComponentEmpty &c) = 0; - virtual void post(const ComponentEUS &c) = 0; - virtual void post(const ComponentRepeat &c) = 0; - virtual void post(const ComponentSequence &c) = 0; - virtual void post(const ComponentWordBoundary &c) = 0; - virtual void post(const UTF8ComponentClass &c) = 0; -}; - -/** - * \brief Concrete subclass of ConstComponentVisitor with default behaviour, - * allowing you to just implement the member functions you need. - */ -class DefaultConstComponentVisitor : public ConstComponentVisitor { -public: - DefaultConstComponentVisitor(); - ~DefaultConstComponentVisitor() override; - - void pre(const AsciiComponentClass &c) override; - void pre(const ComponentAlternation &c) override; - void pre(const ComponentAssertion &c) override; - void pre(const ComponentAtomicGroup &c) override; - void pre(const ComponentBackReference &c) override; - void pre(const ComponentBoundary &c) override; - void pre(const ComponentByte &c) override; - void pre(const ComponentCondReference &c) override; - void pre(const ComponentEmpty &c) override; - void pre(const ComponentEUS &c) override; - void pre(const ComponentRepeat &c) override; - void pre(const ComponentSequence &c) override; - void pre(const ComponentWordBoundary &c) override; - void pre(const UTF8ComponentClass &c) override; - - void during(const AsciiComponentClass &c) override; - void during(const ComponentAlternation &c) override; - void during(const ComponentAssertion &c) override; - void during(const ComponentAtomicGroup &c) override; - void during(const ComponentBackReference &c) override; - void during(const ComponentBoundary &c) override; - void during(const ComponentByte &c) override; - void during(const ComponentCondReference &c) override; - void during(const ComponentEmpty &c) override; - void during(const ComponentEUS &c) override; - void during(const ComponentRepeat &c) override; - void during(const ComponentSequence &c) override; - void during(const ComponentWordBoundary &c) override; - void during(const UTF8ComponentClass &c) override; - - void post(const AsciiComponentClass &c) override; - void post(const ComponentAlternation &c) override; - void post(const ComponentAssertion &c) override; - void post(const ComponentAtomicGroup &c) override; - void post(const ComponentBackReference &c) override; - void post(const ComponentBoundary &c) override; - void post(const ComponentByte &c) override; - void post(const ComponentCondReference &c) override; - void post(const ComponentEmpty &c) override; - void post(const ComponentEUS &c) override; - void post(const ComponentRepeat &c) override; - void post(const ComponentSequence &c) override; - void post(const ComponentWordBoundary &c) override; - void post(const UTF8ComponentClass &c) override; -}; - -} // namespace ue2 - -#endif // CONSTCOMPONENTVISITOR_H +/* + * Copyright (c) 2015, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/** \file + * \brief Visitor base class for working with the component tree. + */ + +#ifndef CONSTCOMPONENTVISITOR_H +#define CONSTCOMPONENTVISITOR_H + +namespace ue2 { + +class AsciiComponentClass; +class Component; +class ComponentAlternation; +class ComponentAssertion; +class ComponentAtomicGroup; +class ComponentBackReference; +class ComponentBoundary; +class ComponentByte; +class ComponentCondReference; +class ComponentClass; +class ComponentEmpty; +class ComponentEUS; +class ComponentRepeat; +class ComponentSequence; +class ComponentWordBoundary; +class UTF8ComponentClass; + +/** + * \brief Visitor base class for traversing an immutable component tree. + * + * Our approach to implementing the visitor pattern for traversing the + * Component tree for a pattern. This version operates on an immutable tree; + * use \ref ComponentVisitor if you need to make changes to components during + * traversal. + */ +class ConstComponentVisitor { +public: + virtual ~ConstComponentVisitor(); + + virtual void pre(const AsciiComponentClass &c) = 0; + virtual void pre(const ComponentAlternation &c) = 0; + virtual void pre(const ComponentAssertion &c) = 0; + virtual void pre(const ComponentAtomicGroup &c) = 0; + virtual void pre(const ComponentBackReference &c) = 0; + virtual void pre(const ComponentBoundary &c) = 0; + virtual void pre(const ComponentByte &c) = 0; + virtual void pre(const ComponentCondReference &c) = 0; + virtual void pre(const ComponentEmpty &c) = 0; + virtual void pre(const ComponentEUS &c) = 0; + virtual void pre(const ComponentRepeat &c) = 0; + virtual void pre(const ComponentSequence &c) = 0; + virtual void pre(const ComponentWordBoundary &c) = 0; + virtual void pre(const UTF8ComponentClass &c) = 0; + + virtual void during(const AsciiComponentClass &c) = 0; + virtual void during(const ComponentAlternation &c) = 0; + virtual void during(const ComponentAssertion &c) = 0; + virtual void during(const ComponentAtomicGroup &c) = 0; + virtual void during(const ComponentBackReference &c) = 0; + virtual void during(const ComponentBoundary &c) = 0; + virtual void during(const ComponentByte &c) = 0; + virtual void during(const ComponentCondReference &c) = 0; + virtual void during(const ComponentEmpty &c) = 0; + virtual void during(const ComponentEUS &c) = 0; + virtual void during(const ComponentRepeat &c) = 0; + virtual void during(const ComponentSequence &c) = 0; + virtual void during(const ComponentWordBoundary &c) = 0; + virtual void during(const UTF8ComponentClass &c) = 0; + + virtual void post(const AsciiComponentClass &c) = 0; + virtual void post(const ComponentAlternation &c) = 0; + virtual void post(const ComponentAssertion &c) = 0; + virtual void post(const ComponentAtomicGroup &c) = 0; + virtual void post(const ComponentBackReference &c) = 0; + virtual void post(const ComponentBoundary &c) = 0; + virtual void post(const ComponentByte &c) = 0; + virtual void post(const ComponentCondReference &c) = 0; + virtual void post(const ComponentEmpty &c) = 0; + virtual void post(const ComponentEUS &c) = 0; + virtual void post(const ComponentRepeat &c) = 0; + virtual void post(const ComponentSequence &c) = 0; + virtual void post(const ComponentWordBoundary &c) = 0; + virtual void post(const UTF8ComponentClass &c) = 0; +}; + +/** + * \brief Concrete subclass of ConstComponentVisitor with default behaviour, + * allowing you to just implement the member functions you need. + */ +class DefaultConstComponentVisitor : public ConstComponentVisitor { +public: + DefaultConstComponentVisitor(); + ~DefaultConstComponentVisitor() override; + + void pre(const AsciiComponentClass &c) override; + void pre(const ComponentAlternation &c) override; + void pre(const ComponentAssertion &c) override; + void pre(const ComponentAtomicGroup &c) override; + void pre(const ComponentBackReference &c) override; + void pre(const ComponentBoundary &c) override; + void pre(const ComponentByte &c) override; + void pre(const ComponentCondReference &c) override; + void pre(const ComponentEmpty &c) override; + void pre(const ComponentEUS &c) override; + void pre(const ComponentRepeat &c) override; + void pre(const ComponentSequence &c) override; + void pre(const ComponentWordBoundary &c) override; + void pre(const UTF8ComponentClass &c) override; + + void during(const AsciiComponentClass &c) override; + void during(const ComponentAlternation &c) override; + void during(const ComponentAssertion &c) override; + void during(const ComponentAtomicGroup &c) override; + void during(const ComponentBackReference &c) override; + void during(const ComponentBoundary &c) override; + void during(const ComponentByte &c) override; + void during(const ComponentCondReference &c) override; + void during(const ComponentEmpty &c) override; + void during(const ComponentEUS &c) override; + void during(const ComponentRepeat &c) override; + void during(const ComponentSequence &c) override; + void during(const ComponentWordBoundary &c) override; + void during(const UTF8ComponentClass &c) override; + + void post(const AsciiComponentClass &c) override; + void post(const ComponentAlternation &c) override; + void post(const ComponentAssertion &c) override; + void post(const ComponentAtomicGroup &c) override; + void post(const ComponentBackReference &c) override; + void post(const ComponentBoundary &c) override; + void post(const ComponentByte &c) override; + void post(const ComponentCondReference &c) override; + void post(const ComponentEmpty &c) override; + void post(const ComponentEUS &c) override; + void post(const ComponentRepeat &c) override; + void post(const ComponentSequence &c) override; + void post(const ComponentWordBoundary &c) override; + void post(const UTF8ComponentClass &c) override; +}; + +} // namespace ue2 + +#endif // CONSTCOMPONENTVISITOR_H diff --git a/contrib/libs/hyperscan/src/parser/Parser.h b/contrib/libs/hyperscan/src/parser/Parser.h index f66506b396..a034a18fc1 100644 --- a/contrib/libs/hyperscan/src/parser/Parser.h +++ b/contrib/libs/hyperscan/src/parser/Parser.h @@ -1,76 +1,76 @@ -/* +/* * Copyright (c) 2015-2017, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -/** \file - * \brief Interface to Parser. - */ - -#ifndef _RE_PARSER_H_ -#define _RE_PARSER_H_ - -#include "ue2common.h" - -#include <memory> - -namespace ue2 { - -class Component; - -/** \brief Represents the current "mode flags" at any point in the parsing - * process. - * - * This is necessary as some modes can be changed part-way through an - * expression, such as in: - * - * /foo(?i)bar/ - */ -struct ParseMode { - ParseMode() {} - explicit ParseMode(u32 hs_flags); - - bool caseless = false; - bool dotall = false; - bool ignore_space = false; - bool multiline = false; - bool ucp = false; - bool utf8 = false; -}; - -/** \brief Parse the given regular expression into a \ref Component tree. - * - * The \a mode parameter should contain the initial mode flags, and will be - * updated by the parser if additional global flags are introduced in the - * expression (for example, via "(*UTF8)".) - * - * This call will throw a ParseError on failure. - */ + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/** \file + * \brief Interface to Parser. + */ + +#ifndef _RE_PARSER_H_ +#define _RE_PARSER_H_ + +#include "ue2common.h" + +#include <memory> + +namespace ue2 { + +class Component; + +/** \brief Represents the current "mode flags" at any point in the parsing + * process. + * + * This is necessary as some modes can be changed part-way through an + * expression, such as in: + * + * /foo(?i)bar/ + */ +struct ParseMode { + ParseMode() {} + explicit ParseMode(u32 hs_flags); + + bool caseless = false; + bool dotall = false; + bool ignore_space = false; + bool multiline = false; + bool ucp = false; + bool utf8 = false; +}; + +/** \brief Parse the given regular expression into a \ref Component tree. + * + * The \a mode parameter should contain the initial mode flags, and will be + * updated by the parser if additional global flags are introduced in the + * expression (for example, via "(*UTF8)".) + * + * This call will throw a ParseError on failure. + */ std::unique_ptr<Component> parse(const char *ptr, ParseMode &mode); - -} // namespace ue2 - -#endif // _RE_PARSER_H_ + +} // namespace ue2 + +#endif // _RE_PARSER_H_ diff --git a/contrib/libs/hyperscan/src/parser/Parser.rl6 b/contrib/libs/hyperscan/src/parser/Parser.rl6 index e923549407..8643aebfc6 100644 --- a/contrib/libs/hyperscan/src/parser/Parser.rl6 +++ b/contrib/libs/hyperscan/src/parser/Parser.rl6 @@ -1,565 +1,565 @@ -/* +/* * Copyright (c) 2015-2017, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -/** \file - * \brief Parser code (generated with Ragel from Parser.rl). - */ - -#include "config.h" - -/* Parser.cpp is a built source, may not be in same dir as parser files */ -#include "parser/check_refs.h" + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/** \file + * \brief Parser code (generated with Ragel from Parser.rl). + */ + +#include "config.h" + +/* Parser.cpp is a built source, may not be in same dir as parser files */ +#include "parser/check_refs.h" #include "parser/control_verbs.h" -#include "parser/ComponentAlternation.h" -#include "parser/ComponentAssertion.h" -#include "parser/ComponentAtomicGroup.h" -#include "parser/ComponentBackReference.h" -#include "parser/ComponentBoundary.h" -#include "parser/ComponentByte.h" -#include "parser/ComponentClass.h" -#include "parser/ComponentCondReference.h" -#include "parser/ComponentEmpty.h" -#include "parser/ComponentEUS.h" -#include "parser/Component.h" -#include "parser/ComponentRepeat.h" -#include "parser/ComponentSequence.h" -#include "parser/ComponentWordBoundary.h" -#include "parser/parse_error.h" -#include "parser/Parser.h" -#include "ue2common.h" -#include "util/compare.h" +#include "parser/ComponentAlternation.h" +#include "parser/ComponentAssertion.h" +#include "parser/ComponentAtomicGroup.h" +#include "parser/ComponentBackReference.h" +#include "parser/ComponentBoundary.h" +#include "parser/ComponentByte.h" +#include "parser/ComponentClass.h" +#include "parser/ComponentCondReference.h" +#include "parser/ComponentEmpty.h" +#include "parser/ComponentEUS.h" +#include "parser/Component.h" +#include "parser/ComponentRepeat.h" +#include "parser/ComponentSequence.h" +#include "parser/ComponentWordBoundary.h" +#include "parser/parse_error.h" +#include "parser/Parser.h" +#include "ue2common.h" +#include "util/compare.h" #include "util/flat_containers.h" -#include "util/make_unique.h" -#include "util/unicode_def.h" -#include "util/verify_types.h" - -#include <cassert> -#include <cctype> -#include <cstring> -#include <cstdlib> -#include <map> -#include <sstream> -#include <string> -#include <vector> - -using namespace std; - -namespace ue2 { - -#define PUSH_SEQUENCE do {\ - sequences.push_back(ExprState(currentSeq, (size_t)(ts - ptr), \ - mode)); \ - } while(0) -#define POP_SEQUENCE do {\ - currentSeq = sequences.back().seq; \ - mode = sequences.back().mode; \ - sequences.pop_back(); \ - } while(0) - -namespace { - -/** \brief Structure representing current state as we're parsing (current - * sequence, current options). Stored in the 'sequences' vector. */ -struct ExprState { - ExprState(ComponentSequence *seq_in, size_t offset, - const ParseMode &mode_in) : - seq(seq_in), seqOffset(offset), mode(mode_in) {} - - ComponentSequence *seq; //!< current sequence - size_t seqOffset; //!< offset seq was entered, for error reporting - ParseMode mode; //!< current mode flags -}; - -} // namespace - -static -unsigned parseAsDecimal(unsigned oct) { - // The input was parsed as octal, but should have been parsed as decimal. - // Deconstruct the octal number and reconstruct into decimal - unsigned ret = 0; - unsigned multiplier = 1; - while (oct) { - ret += (oct & 0x7) * multiplier; - oct >>= 3; - multiplier *= 10; - } - return ret; -} - -/** \brief Maximum value for a positive integer. We use INT_MAX, as that's what - * PCRE uses. */ -static constexpr u32 MAX_NUMBER = INT_MAX; - -static +#include "util/make_unique.h" +#include "util/unicode_def.h" +#include "util/verify_types.h" + +#include <cassert> +#include <cctype> +#include <cstring> +#include <cstdlib> +#include <map> +#include <sstream> +#include <string> +#include <vector> + +using namespace std; + +namespace ue2 { + +#define PUSH_SEQUENCE do {\ + sequences.push_back(ExprState(currentSeq, (size_t)(ts - ptr), \ + mode)); \ + } while(0) +#define POP_SEQUENCE do {\ + currentSeq = sequences.back().seq; \ + mode = sequences.back().mode; \ + sequences.pop_back(); \ + } while(0) + +namespace { + +/** \brief Structure representing current state as we're parsing (current + * sequence, current options). Stored in the 'sequences' vector. */ +struct ExprState { + ExprState(ComponentSequence *seq_in, size_t offset, + const ParseMode &mode_in) : + seq(seq_in), seqOffset(offset), mode(mode_in) {} + + ComponentSequence *seq; //!< current sequence + size_t seqOffset; //!< offset seq was entered, for error reporting + ParseMode mode; //!< current mode flags +}; + +} // namespace + +static +unsigned parseAsDecimal(unsigned oct) { + // The input was parsed as octal, but should have been parsed as decimal. + // Deconstruct the octal number and reconstruct into decimal + unsigned ret = 0; + unsigned multiplier = 1; + while (oct) { + ret += (oct & 0x7) * multiplier; + oct >>= 3; + multiplier *= 10; + } + return ret; +} + +/** \brief Maximum value for a positive integer. We use INT_MAX, as that's what + * PCRE uses. */ +static constexpr u32 MAX_NUMBER = INT_MAX; + +static void pushDec(u32 *acc, char raw_digit) { - assert(raw_digit >= '0' && raw_digit <= '9'); - u32 digit_val = raw_digit - '0'; - - // Ensure that we don't overflow. - u64a val = ((u64a)*acc * 10) + digit_val; - if (val > MAX_NUMBER) { - throw LocatedParseError("Number is too big"); - } - - *acc = verify_u32(val); -} - -static + assert(raw_digit >= '0' && raw_digit <= '9'); + u32 digit_val = raw_digit - '0'; + + // Ensure that we don't overflow. + u64a val = ((u64a)*acc * 10) + digit_val; + if (val > MAX_NUMBER) { + throw LocatedParseError("Number is too big"); + } + + *acc = verify_u32(val); +} + +static void pushOct(u32 *acc, char raw_digit) { - assert(raw_digit >= '0' && raw_digit <= '7'); - u32 digit_val = raw_digit - '0'; - - // Ensure that we don't overflow. - u64a val = ((u64a)*acc * 8) + digit_val; - if (val > MAX_NUMBER) { - throw LocatedParseError("Number is too big"); - } - - *acc = verify_u32(val); -} - -static -void throwInvalidRepeat(void) { - throw LocatedParseError("Invalid repeat"); -} - -static -void throwInvalidUtf8(void) { - throw ParseError("Expression is not valid UTF-8."); -} - -/** - * Adds the given child component to the parent sequence, returning a pointer - * to the new (child) "current sequence". - */ -static -ComponentSequence *enterSequence(ComponentSequence *parent, - unique_ptr<ComponentSequence> child) { - assert(parent); - assert(child); - - ComponentSequence *seq = child.get(); - parent->addComponent(move(child)); - return seq; -} - -static + assert(raw_digit >= '0' && raw_digit <= '7'); + u32 digit_val = raw_digit - '0'; + + // Ensure that we don't overflow. + u64a val = ((u64a)*acc * 8) + digit_val; + if (val > MAX_NUMBER) { + throw LocatedParseError("Number is too big"); + } + + *acc = verify_u32(val); +} + +static +void throwInvalidRepeat(void) { + throw LocatedParseError("Invalid repeat"); +} + +static +void throwInvalidUtf8(void) { + throw ParseError("Expression is not valid UTF-8."); +} + +/** + * Adds the given child component to the parent sequence, returning a pointer + * to the new (child) "current sequence". + */ +static +ComponentSequence *enterSequence(ComponentSequence *parent, + unique_ptr<ComponentSequence> child) { + assert(parent); + assert(child); + + ComponentSequence *seq = child.get(); + parent->addComponent(move(child)); + return seq; +} + +static void addLiteral(ComponentSequence *currentSeq, char c, const ParseMode &mode) { - if (mode.utf8 && mode.caseless) { - /* leverage ComponentClass to generate the vertices */ - auto cc = getComponentClass(mode); - assert(cc); - cc->add(c); - cc->finalize(); - currentSeq->addComponent(move(cc)); - } else { - currentSeq->addComponent(getLiteralComponentClass(c, mode.caseless)); - } -} - -static -void addEscaped(ComponentSequence *currentSeq, unichar accum, - const ParseMode &mode, const char *err_msg) { - if (mode.utf8) { - /* leverage ComponentClass to generate the vertices */ - auto cc = getComponentClass(mode); - assert(cc); - cc->add(accum); - cc->finalize(); - currentSeq->addComponent(move(cc)); - } else { - if (accum > 255) { - throw LocatedParseError(err_msg); - } + if (mode.utf8 && mode.caseless) { + /* leverage ComponentClass to generate the vertices */ + auto cc = getComponentClass(mode); + assert(cc); + cc->add(c); + cc->finalize(); + currentSeq->addComponent(move(cc)); + } else { + currentSeq->addComponent(getLiteralComponentClass(c, mode.caseless)); + } +} + +static +void addEscaped(ComponentSequence *currentSeq, unichar accum, + const ParseMode &mode, const char *err_msg) { + if (mode.utf8) { + /* leverage ComponentClass to generate the vertices */ + auto cc = getComponentClass(mode); + assert(cc); + cc->add(accum); + cc->finalize(); + currentSeq->addComponent(move(cc)); + } else { + if (accum > 255) { + throw LocatedParseError(err_msg); + } addLiteral(currentSeq, (char)accum, mode); - } -} - -static -void addEscapedOctal(ComponentSequence *currentSeq, unichar accum, - const ParseMode &mode) { - addEscaped(currentSeq, accum, mode, "Octal value is greater than \\377"); -} - -static -void addEscapedHex(ComponentSequence *currentSeq, unichar accum, - const ParseMode &mode) { - addEscaped(currentSeq, accum, mode, - "Hexadecimal value is greater than \\xFF"); -} - -#define SLASH_C_ERROR "\\c must be followed by an ASCII character" - -static + } +} + +static +void addEscapedOctal(ComponentSequence *currentSeq, unichar accum, + const ParseMode &mode) { + addEscaped(currentSeq, accum, mode, "Octal value is greater than \\377"); +} + +static +void addEscapedHex(ComponentSequence *currentSeq, unichar accum, + const ParseMode &mode) { + addEscaped(currentSeq, accum, mode, + "Hexadecimal value is greater than \\xFF"); +} + +#define SLASH_C_ERROR "\\c must be followed by an ASCII character" + +static u8 decodeCtrl(char raw) { - if (raw & 0x80) { - throw LocatedParseError(SLASH_C_ERROR); - } - return mytoupper(raw) ^ 0x40; -} - -static + if (raw & 0x80) { + throw LocatedParseError(SLASH_C_ERROR); + } + return mytoupper(raw) ^ 0x40; +} + +static unichar readUtf8CodePoint2c(const char *s) { auto *ts = (const u8 *)s; - assert(ts[0] >= 0xc0 && ts[0] < 0xe0); - assert(ts[1] >= 0x80 && ts[1] < 0xc0); - unichar val = ts[0] & 0x1f; - val <<= 6; - val |= ts[1] & 0x3f; - DEBUG_PRINTF("utf8 %02hhx %02hhx ->\\x{%x}\n", ts[0], - ts[1], val); - return val; -} - -static + assert(ts[0] >= 0xc0 && ts[0] < 0xe0); + assert(ts[1] >= 0x80 && ts[1] < 0xc0); + unichar val = ts[0] & 0x1f; + val <<= 6; + val |= ts[1] & 0x3f; + DEBUG_PRINTF("utf8 %02hhx %02hhx ->\\x{%x}\n", ts[0], + ts[1], val); + return val; +} + +static unichar readUtf8CodePoint3c(const char *s) { auto *ts = (const u8 *)s; - assert(ts[0] >= 0xe0 && ts[0] < 0xf0); - assert(ts[1] >= 0x80 && ts[1] < 0xc0); - assert(ts[2] >= 0x80 && ts[2] < 0xc0); - unichar val = ts[0] & 0x0f; - val <<= 6; - val |= ts[1] & 0x3f; - val <<= 6; - val |= ts[2] & 0x3f; - DEBUG_PRINTF("utf8 %02hhx %02hhx %02hhx ->\\x{%x}\n", ts[0], - ts[1], ts[2], val); - return val; -} - -static + assert(ts[0] >= 0xe0 && ts[0] < 0xf0); + assert(ts[1] >= 0x80 && ts[1] < 0xc0); + assert(ts[2] >= 0x80 && ts[2] < 0xc0); + unichar val = ts[0] & 0x0f; + val <<= 6; + val |= ts[1] & 0x3f; + val <<= 6; + val |= ts[2] & 0x3f; + DEBUG_PRINTF("utf8 %02hhx %02hhx %02hhx ->\\x{%x}\n", ts[0], + ts[1], ts[2], val); + return val; +} + +static unichar readUtf8CodePoint4c(const char *s) { auto *ts = (const u8 *)s; - assert(ts[0] >= 0xf0 && ts[0] < 0xf8); - assert(ts[1] >= 0x80 && ts[1] < 0xc0); - assert(ts[2] >= 0x80 && ts[2] < 0xc0); - assert(ts[3] >= 0x80 && ts[3] < 0xc0); - unichar val = ts[0] & 0x07; - val <<= 6; - val |= ts[1] & 0x3f; - val <<= 6; - val |= ts[2] & 0x3f; - val <<= 6; - val |= ts[3] & 0x3f; - DEBUG_PRINTF("utf8 %02hhx %02hhx %02hhx %02hhx ->\\x{%x}\n", ts[0], - ts[1], ts[2], ts[3], val); - return val; -} - -%%{ - machine regex; - - action throwUnsupportedEscape { - ostringstream str; + assert(ts[0] >= 0xf0 && ts[0] < 0xf8); + assert(ts[1] >= 0x80 && ts[1] < 0xc0); + assert(ts[2] >= 0x80 && ts[2] < 0xc0); + assert(ts[3] >= 0x80 && ts[3] < 0xc0); + unichar val = ts[0] & 0x07; + val <<= 6; + val |= ts[1] & 0x3f; + val <<= 6; + val |= ts[2] & 0x3f; + val <<= 6; + val |= ts[3] & 0x3f; + DEBUG_PRINTF("utf8 %02hhx %02hhx %02hhx %02hhx ->\\x{%x}\n", ts[0], + ts[1], ts[2], ts[3], val); + return val; +} + +%%{ + machine regex; + + action throwUnsupportedEscape { + ostringstream str; str << "'\\" << *(ts + 1) << "' at index " << ts - ptr << " not supported in a character class."; - throw ParseError(str.str()); - } - action unsupportedProperty { - throw LocatedParseError("Character property not supported"); - } - action clearLabel { label.clear();} - action appendLabelCharacter { label.push_back(fc);} - action clearOctAccumulator { octAccumulator = 0;} - action clearAccumulator { accumulator = 0;} - action setOctAccumulator { - octAccumulator = 0; - pushOct(&octAccumulator, fc); - } - action setDecAccumulator { - accumulator = 0; - pushDec(&accumulator, fc); - } - action clearNM { repeatN = 0; repeatM = 0; } - action appendN { pushDec(&repeatN, fc); } - action appendM { pushDec(&repeatM, fc); } - action appendAccumulatorOctDigit { pushOct(&octAccumulator, fc); } - action appendAccumulatorDecDigit { pushDec(&accumulator, fc); } - action appendAccumulatorHexDigit { - accumulator *= 16; - accumulator += fc - '0'; - } - action appendAccumulatorHexL { - accumulator *= 16; - accumulator += 10 + fc - 'a'; - } - action appendAccumulatorHexU { - accumulator *= 16; - accumulator += 10 + fc - 'A'; - } - - # enter a comment group, where we just scan for a close paren. - action enterComment { - inComment = true; - fgoto readComment; - } - - # enter an extended mode comment, where we just scan for a newline. - action enterNewlineTerminatedComment { - inComment = true; - fgoto readNewlineTerminatedComment; - } - - # enter a CAPTURING group ( e.g. '(blah)' ) - action enterCapturingGroup { - PUSH_SEQUENCE; - auto seq = ue2::make_unique<ComponentSequence>(); - seq->setCaptureIndex(groupIndex++); - currentSeq = enterSequence(currentSeq, move(seq)); - } - - # enter a NAMED CAPTURING group ( e.g. (?'<hatstand>blah) ) - action enterNamedGroup { - assert(!label.empty()); // should be guaranteed by machine - char c = *label.begin(); - if (c >= '0' && c <= '9') { - throw LocatedParseError("Group name cannot begin with a digit"); - } - if (!groupNames.insert(label).second) { - throw LocatedParseError("Two named subpatterns use the name '" + label + "'"); - } - PUSH_SEQUENCE; - auto seq = ue2::make_unique<ComponentSequence>(); - seq->setCaptureIndex(groupIndex++); - seq->setCaptureName(label); - currentSeq = enterSequence(currentSeq, move(seq)); - } - - # enter a NON-CAPTURING group where we're modifying flags - # ( e.g. '(?i:blah)' ). Standard non-capturing groups use this path - # as well. - action enterModifiedGroup { - PUSH_SEQUENCE; - mode = newMode; - currentSeq = - enterSequence(currentSeq, ue2::make_unique<ComponentSequence>()); - } - - action exitGroup { - if (sequences.empty()) { - throw LocatedParseError("Unmatched parentheses"); - } - currentSeq->finalize(); - POP_SEQUENCE; - } - action enterZWLookAhead { - PUSH_SEQUENCE; - currentSeq = enterSequence(currentSeq, - ue2::make_unique<ComponentAssertion>(ComponentAssertion::LOOKAHEAD, - ComponentAssertion::POS)); - } - action enterZWNegLookAhead { - PUSH_SEQUENCE; - currentSeq = enterSequence(currentSeq, - ue2::make_unique<ComponentAssertion>(ComponentAssertion::LOOKAHEAD, - ComponentAssertion::NEG)); - } - action enterZWLookBehind { - PUSH_SEQUENCE; - currentSeq = enterSequence(currentSeq, - ue2::make_unique<ComponentAssertion>(ComponentAssertion::LOOKBEHIND, - ComponentAssertion::POS)); - } - action enterZWNegLookBehind { - PUSH_SEQUENCE; - currentSeq = enterSequence(currentSeq, - ue2::make_unique<ComponentAssertion>(ComponentAssertion::LOOKBEHIND, - ComponentAssertion::NEG)); - } - action enterEmbeddedCode { - throw LocatedParseError("Embedded code is not supported"); - } - action enterConditionUnsupported { - throw LocatedParseError("Conditional subpattern unsupported"); - } - action enterReferenceUnsupported { - throw LocatedParseError("Subpattern reference unsupported"); - } - action enterNumberedConditionalRef { - if (accumulator == 0) { - throw LocatedParseError("Numbered reference cannot be zero"); - } - PUSH_SEQUENCE; - currentSeq = enterSequence(currentSeq, - ue2::make_unique<ComponentCondReference>(accumulator)); - } - action enterNamedConditionalRef { - PUSH_SEQUENCE; - assert(!label.empty()); - currentSeq = enterSequence(currentSeq, - ue2::make_unique<ComponentCondReference>(label)); - } - action enterAtomicGroup { - PUSH_SEQUENCE; - currentSeq = enterSequence(currentSeq, - ue2::make_unique<ComponentAtomicGroup>()); - } - action eatClass { - assert(!currentCls); - assert(!inCharClass); // not reentrant - currentCls = getComponentClass(mode); - inCharClass = true; - inCharClassEarly = true; - currentClsBegin = ts; - fgoto readClass; - } - action resetModifiers { - newMode = mode; - } - action applyModifiers { - mode = newMode; - currentSeq->addComponent(ue2::make_unique<ComponentEmpty>()); - } - action modifyMatchPositive { - switch (fc) { - case 'i': - newMode.caseless = true; - break; - case 'm': - newMode.multiline = true; - break; - case 's': - newMode.dotall = true; - break; - case 'x': - newMode.ignore_space = true; - break; - default: - assert(0); // this action only called for [imsx] - break; - } - } - action modifyMatchNegative { - switch (fc) { - case 'i': - newMode.caseless = false; - break; - case 'm': - newMode.multiline = false; - break; - case 's': - newMode.dotall = false; - break; - case 'x': - newMode.ignore_space = false; - break; - default: - assert(0); // this action only called for [imsx] - break; - } - } - action is_utf8 { mode.utf8 } - action is_ignore_space { mode.ignore_space } - action is_early_charclass { inCharClassEarly } - - action addNumberedBackRef { - if (accumulator == 0) { - throw LocatedParseError("Numbered reference cannot be zero"); - } - currentSeq->addComponent(ue2::make_unique<ComponentBackReference>(accumulator)); - } - - action addNegativeNumberedBackRef { - // Accumulator is a negative offset. - if (accumulator == 0) { - throw LocatedParseError("Numbered reference cannot be zero"); - } - if (accumulator >= groupIndex) { - throw LocatedParseError("Invalid reference"); - } - unsigned idx = groupIndex - accumulator; - currentSeq->addComponent(ue2::make_unique<ComponentBackReference>(idx)); - } - - action addNamedBackRef { - currentSeq->addComponent(ue2::make_unique<ComponentBackReference>(label)); - } - - escapedOctal0 = '\\0' @clearOctAccumulator [0-7]{0,2} $appendAccumulatorOctDigit; - escapedOctal2 = '\\' [1-7] $setOctAccumulator [0-7]{1,2} $appendAccumulatorOctDigit; - escapedOctal2c = '\\' [1-7] $setOctAccumulator [0-7]{0,2} $appendAccumulatorOctDigit; - backRefIdSingle = [1-7] $setDecAccumulator; - backRefId = [1-9] $setDecAccumulator [0-9]+ $appendAccumulatorDecDigit; - escapedHex = '\\x' @clearAccumulator ([0-9] $appendAccumulatorHexDigit | [a-f] $appendAccumulatorHexL | [A-F] $appendAccumulatorHexU){0,2}; - escapedCtrl = '\\c' any?; - escapedUnsupported = '\\' [NluLU]; - repeatNM1 = '\{' @clearNM [0-9]+ $appendN ('}' @{repeatM = repeatN;} | ',' '\}' @{repeatM = ComponentRepeat::NoLimit;} | ',' [0-9]+ $appendM '}'); - - backReferenceG = '\\g' @clearAccumulator [0-9]{1,3} $appendAccumulatorDecDigit; - backReferenceGNegative = '\\g-' @clearAccumulator [0-9]{1,3} $appendAccumulatorDecDigit; - backReferenceGBracket = '\\g{' @clearAccumulator [0-9]{1,3} $appendAccumulatorDecDigit '}'; - backReferenceGBracket2 = '\\g{-' @clearAccumulator [0-9]{1,3} $appendAccumulatorDecDigit '}'; - backReferenceGBracketName = '\\g{' @clearLabel [A-Za-z0-9_]+ $appendLabelCharacter '}'; - backReferenceKBracketName = '\\k{' @clearLabel [A-Za-z0-9_]+ $appendLabelCharacter '}'; - backReferenceKBracketName2 = '\\k<' @clearLabel [A-Za-z0-9_]+ $appendLabelCharacter '>'; - backReferenceKBracketName3 = '\\k\'' @clearLabel [A-Za-z0-9_]+ $appendLabelCharacter '\''; - backReferenceP = '(?P=' @clearLabel [A-Za-z0-9_]+ $appendLabelCharacter ')'; - - namedGroup1 = '(?<' @clearLabel [A-Za-z0-9_]+ $appendLabelCharacter '>'; - namedGroup2 = '(?\'' @clearLabel [A-Za-z0-9_]+ $appendLabelCharacter '\''; - namedGroup3 = '(?P<' @clearLabel [A-Za-z0-9_]+ $appendLabelCharacter '>'; - - namedConditionalRef1 = '(?(<' @clearLabel [A-Za-z0-9_]+ $appendLabelCharacter '>)'; - namedConditionalRef2 = '(?(\'' @clearLabel [A-Za-z0-9_]+ $appendLabelCharacter '\')'; - namedConditionalRef3 = '(?(' @clearLabel [A-Za-z0-9_]+ $appendLabelCharacter ')'; - - numberedSubExpression = '(?' [+\-]? [0-9]+ ')'; - namedSubExpression = '(?' ('&'|'P>') [A-Za-z0-9_]+ ')'; - - positiveMatchModifiers = [imsx]+ $modifyMatchPositive; - negativeMatchModifiers = '-' [imsx]+ $modifyMatchNegative; - matchModifiers = positiveMatchModifiers ? negativeMatchModifiers ?; - - utf8_cont = 0x80..0xbf; - utf8_2c = 0xc0..0xdf utf8_cont; - utf8_3c = 0xe0..0xef utf8_cont utf8_cont; - utf8_4c = 0xf0..0xf7 utf8_cont utf8_cont utf8_cont; - hi_byte = 0x80..0xff; - - whitespace = [\t\n\v\f\r ]; - - ############################################################# - # Trivial parser to read Perl 5.10+ control verbs, introduced - # by '(*'. - ############################################################# - readVerb := |* - 'UTF8)' => { + throw ParseError(str.str()); + } + action unsupportedProperty { + throw LocatedParseError("Character property not supported"); + } + action clearLabel { label.clear();} + action appendLabelCharacter { label.push_back(fc);} + action clearOctAccumulator { octAccumulator = 0;} + action clearAccumulator { accumulator = 0;} + action setOctAccumulator { + octAccumulator = 0; + pushOct(&octAccumulator, fc); + } + action setDecAccumulator { + accumulator = 0; + pushDec(&accumulator, fc); + } + action clearNM { repeatN = 0; repeatM = 0; } + action appendN { pushDec(&repeatN, fc); } + action appendM { pushDec(&repeatM, fc); } + action appendAccumulatorOctDigit { pushOct(&octAccumulator, fc); } + action appendAccumulatorDecDigit { pushDec(&accumulator, fc); } + action appendAccumulatorHexDigit { + accumulator *= 16; + accumulator += fc - '0'; + } + action appendAccumulatorHexL { + accumulator *= 16; + accumulator += 10 + fc - 'a'; + } + action appendAccumulatorHexU { + accumulator *= 16; + accumulator += 10 + fc - 'A'; + } + + # enter a comment group, where we just scan for a close paren. + action enterComment { + inComment = true; + fgoto readComment; + } + + # enter an extended mode comment, where we just scan for a newline. + action enterNewlineTerminatedComment { + inComment = true; + fgoto readNewlineTerminatedComment; + } + + # enter a CAPTURING group ( e.g. '(blah)' ) + action enterCapturingGroup { + PUSH_SEQUENCE; + auto seq = ue2::make_unique<ComponentSequence>(); + seq->setCaptureIndex(groupIndex++); + currentSeq = enterSequence(currentSeq, move(seq)); + } + + # enter a NAMED CAPTURING group ( e.g. (?'<hatstand>blah) ) + action enterNamedGroup { + assert(!label.empty()); // should be guaranteed by machine + char c = *label.begin(); + if (c >= '0' && c <= '9') { + throw LocatedParseError("Group name cannot begin with a digit"); + } + if (!groupNames.insert(label).second) { + throw LocatedParseError("Two named subpatterns use the name '" + label + "'"); + } + PUSH_SEQUENCE; + auto seq = ue2::make_unique<ComponentSequence>(); + seq->setCaptureIndex(groupIndex++); + seq->setCaptureName(label); + currentSeq = enterSequence(currentSeq, move(seq)); + } + + # enter a NON-CAPTURING group where we're modifying flags + # ( e.g. '(?i:blah)' ). Standard non-capturing groups use this path + # as well. + action enterModifiedGroup { + PUSH_SEQUENCE; + mode = newMode; + currentSeq = + enterSequence(currentSeq, ue2::make_unique<ComponentSequence>()); + } + + action exitGroup { + if (sequences.empty()) { + throw LocatedParseError("Unmatched parentheses"); + } + currentSeq->finalize(); + POP_SEQUENCE; + } + action enterZWLookAhead { + PUSH_SEQUENCE; + currentSeq = enterSequence(currentSeq, + ue2::make_unique<ComponentAssertion>(ComponentAssertion::LOOKAHEAD, + ComponentAssertion::POS)); + } + action enterZWNegLookAhead { + PUSH_SEQUENCE; + currentSeq = enterSequence(currentSeq, + ue2::make_unique<ComponentAssertion>(ComponentAssertion::LOOKAHEAD, + ComponentAssertion::NEG)); + } + action enterZWLookBehind { + PUSH_SEQUENCE; + currentSeq = enterSequence(currentSeq, + ue2::make_unique<ComponentAssertion>(ComponentAssertion::LOOKBEHIND, + ComponentAssertion::POS)); + } + action enterZWNegLookBehind { + PUSH_SEQUENCE; + currentSeq = enterSequence(currentSeq, + ue2::make_unique<ComponentAssertion>(ComponentAssertion::LOOKBEHIND, + ComponentAssertion::NEG)); + } + action enterEmbeddedCode { + throw LocatedParseError("Embedded code is not supported"); + } + action enterConditionUnsupported { + throw LocatedParseError("Conditional subpattern unsupported"); + } + action enterReferenceUnsupported { + throw LocatedParseError("Subpattern reference unsupported"); + } + action enterNumberedConditionalRef { + if (accumulator == 0) { + throw LocatedParseError("Numbered reference cannot be zero"); + } + PUSH_SEQUENCE; + currentSeq = enterSequence(currentSeq, + ue2::make_unique<ComponentCondReference>(accumulator)); + } + action enterNamedConditionalRef { + PUSH_SEQUENCE; + assert(!label.empty()); + currentSeq = enterSequence(currentSeq, + ue2::make_unique<ComponentCondReference>(label)); + } + action enterAtomicGroup { + PUSH_SEQUENCE; + currentSeq = enterSequence(currentSeq, + ue2::make_unique<ComponentAtomicGroup>()); + } + action eatClass { + assert(!currentCls); + assert(!inCharClass); // not reentrant + currentCls = getComponentClass(mode); + inCharClass = true; + inCharClassEarly = true; + currentClsBegin = ts; + fgoto readClass; + } + action resetModifiers { + newMode = mode; + } + action applyModifiers { + mode = newMode; + currentSeq->addComponent(ue2::make_unique<ComponentEmpty>()); + } + action modifyMatchPositive { + switch (fc) { + case 'i': + newMode.caseless = true; + break; + case 'm': + newMode.multiline = true; + break; + case 's': + newMode.dotall = true; + break; + case 'x': + newMode.ignore_space = true; + break; + default: + assert(0); // this action only called for [imsx] + break; + } + } + action modifyMatchNegative { + switch (fc) { + case 'i': + newMode.caseless = false; + break; + case 'm': + newMode.multiline = false; + break; + case 's': + newMode.dotall = false; + break; + case 'x': + newMode.ignore_space = false; + break; + default: + assert(0); // this action only called for [imsx] + break; + } + } + action is_utf8 { mode.utf8 } + action is_ignore_space { mode.ignore_space } + action is_early_charclass { inCharClassEarly } + + action addNumberedBackRef { + if (accumulator == 0) { + throw LocatedParseError("Numbered reference cannot be zero"); + } + currentSeq->addComponent(ue2::make_unique<ComponentBackReference>(accumulator)); + } + + action addNegativeNumberedBackRef { + // Accumulator is a negative offset. + if (accumulator == 0) { + throw LocatedParseError("Numbered reference cannot be zero"); + } + if (accumulator >= groupIndex) { + throw LocatedParseError("Invalid reference"); + } + unsigned idx = groupIndex - accumulator; + currentSeq->addComponent(ue2::make_unique<ComponentBackReference>(idx)); + } + + action addNamedBackRef { + currentSeq->addComponent(ue2::make_unique<ComponentBackReference>(label)); + } + + escapedOctal0 = '\\0' @clearOctAccumulator [0-7]{0,2} $appendAccumulatorOctDigit; + escapedOctal2 = '\\' [1-7] $setOctAccumulator [0-7]{1,2} $appendAccumulatorOctDigit; + escapedOctal2c = '\\' [1-7] $setOctAccumulator [0-7]{0,2} $appendAccumulatorOctDigit; + backRefIdSingle = [1-7] $setDecAccumulator; + backRefId = [1-9] $setDecAccumulator [0-9]+ $appendAccumulatorDecDigit; + escapedHex = '\\x' @clearAccumulator ([0-9] $appendAccumulatorHexDigit | [a-f] $appendAccumulatorHexL | [A-F] $appendAccumulatorHexU){0,2}; + escapedCtrl = '\\c' any?; + escapedUnsupported = '\\' [NluLU]; + repeatNM1 = '\{' @clearNM [0-9]+ $appendN ('}' @{repeatM = repeatN;} | ',' '\}' @{repeatM = ComponentRepeat::NoLimit;} | ',' [0-9]+ $appendM '}'); + + backReferenceG = '\\g' @clearAccumulator [0-9]{1,3} $appendAccumulatorDecDigit; + backReferenceGNegative = '\\g-' @clearAccumulator [0-9]{1,3} $appendAccumulatorDecDigit; + backReferenceGBracket = '\\g{' @clearAccumulator [0-9]{1,3} $appendAccumulatorDecDigit '}'; + backReferenceGBracket2 = '\\g{-' @clearAccumulator [0-9]{1,3} $appendAccumulatorDecDigit '}'; + backReferenceGBracketName = '\\g{' @clearLabel [A-Za-z0-9_]+ $appendLabelCharacter '}'; + backReferenceKBracketName = '\\k{' @clearLabel [A-Za-z0-9_]+ $appendLabelCharacter '}'; + backReferenceKBracketName2 = '\\k<' @clearLabel [A-Za-z0-9_]+ $appendLabelCharacter '>'; + backReferenceKBracketName3 = '\\k\'' @clearLabel [A-Za-z0-9_]+ $appendLabelCharacter '\''; + backReferenceP = '(?P=' @clearLabel [A-Za-z0-9_]+ $appendLabelCharacter ')'; + + namedGroup1 = '(?<' @clearLabel [A-Za-z0-9_]+ $appendLabelCharacter '>'; + namedGroup2 = '(?\'' @clearLabel [A-Za-z0-9_]+ $appendLabelCharacter '\''; + namedGroup3 = '(?P<' @clearLabel [A-Za-z0-9_]+ $appendLabelCharacter '>'; + + namedConditionalRef1 = '(?(<' @clearLabel [A-Za-z0-9_]+ $appendLabelCharacter '>)'; + namedConditionalRef2 = '(?(\'' @clearLabel [A-Za-z0-9_]+ $appendLabelCharacter '\')'; + namedConditionalRef3 = '(?(' @clearLabel [A-Za-z0-9_]+ $appendLabelCharacter ')'; + + numberedSubExpression = '(?' [+\-]? [0-9]+ ')'; + namedSubExpression = '(?' ('&'|'P>') [A-Za-z0-9_]+ ')'; + + positiveMatchModifiers = [imsx]+ $modifyMatchPositive; + negativeMatchModifiers = '-' [imsx]+ $modifyMatchNegative; + matchModifiers = positiveMatchModifiers ? negativeMatchModifiers ?; + + utf8_cont = 0x80..0xbf; + utf8_2c = 0xc0..0xdf utf8_cont; + utf8_3c = 0xe0..0xef utf8_cont utf8_cont; + utf8_4c = 0xf0..0xf7 utf8_cont utf8_cont utf8_cont; + hi_byte = 0x80..0xff; + + whitespace = [\t\n\v\f\r ]; + + ############################################################# + # Trivial parser to read Perl 5.10+ control verbs, introduced + # by '(*'. + ############################################################# + readVerb := |* + 'UTF8)' => { throw LocatedParseError("(*UTF8) must be at start of " "expression, encountered"); - }; + }; 'UTF)' => { throw LocatedParseError("(*UTF) must be at start of " "expression, encountered"); }; - 'UCP)' => { + 'UCP)' => { throw LocatedParseError("(*UCP) must be at start of " "expression, encountered"); - }; + }; # Use the control verb mini-parser to report an error for this # unsupported/unknown verb. [^)]+ ')' => { @@ -568,414 +568,414 @@ unichar readUtf8CodePoint4c(const char *s) { read_control_verbs(ts - 2, te, (ts - 2 - ptr), temp_mode); assert(0); // Should have thrown a parse error. throw LocatedParseError("Unknown control verb"); - }; - any => { - throw LocatedParseError("Unknown control verb"); - }; - *|; - - ############################################################# - # Parser to read UCP - ############################################################# - readUCP := |* - 'C' => { currentCls->add(CLASS_UCP_C, negated); fret; }; - 'Cc' => { currentCls->add(CLASS_UCP_CC, negated); fret; }; - 'Cf' => { currentCls->add(CLASS_UCP_CF, negated); fret; }; - 'Cn' => { currentCls->add(CLASS_UCP_CN, negated); fret; }; - 'Co' => { currentCls->add(CLASS_UCP_CO, negated); fret; }; - 'Cs' => { currentCls->add(CLASS_UCP_CS, negated); fret; }; - 'L' => { currentCls->add(CLASS_UCP_L, negated); fret; }; - 'Ll' => { currentCls->add(CLASS_UCP_LL, negated); fret; }; - 'Lm' => { currentCls->add(CLASS_UCP_LM, negated); fret; }; - 'Lo' => { currentCls->add(CLASS_UCP_LO, negated); fret; }; - 'Lt' => { currentCls->add(CLASS_UCP_LT, negated); fret; }; - 'Lu' => { currentCls->add(CLASS_UCP_LU, negated); fret; }; - 'L&' => { currentCls->add(CLASS_UCP_L_AND, negated); fret; }; - 'M' => { currentCls->add(CLASS_UCP_M, negated); fret; }; - 'Mc' => { currentCls->add(CLASS_UCP_MC, negated); fret; }; - 'Me' => { currentCls->add(CLASS_UCP_ME, negated); fret; }; - 'Mn' => { currentCls->add(CLASS_UCP_MN, negated); fret; }; - 'N' => { currentCls->add(CLASS_UCP_N, negated); fret; }; - 'Nd' => { currentCls->add(CLASS_UCP_ND, negated); fret; }; - 'Nl' => { currentCls->add(CLASS_UCP_NL, negated); fret; }; - 'No' => { currentCls->add(CLASS_UCP_NO, negated); fret; }; - 'P' => { currentCls->add(CLASS_UCP_P, negated); fret; }; - 'Pc' => { currentCls->add(CLASS_UCP_PC, negated); fret; }; - 'Pd' => { currentCls->add(CLASS_UCP_PD, negated); fret; }; - 'Pe' => { currentCls->add(CLASS_UCP_PE, negated); fret; }; - 'Pf' => { currentCls->add(CLASS_UCP_PF, negated); fret; }; - 'Pi' => { currentCls->add(CLASS_UCP_PI, negated); fret; }; - 'Po' => { currentCls->add(CLASS_UCP_PO, negated); fret; }; - 'Ps' => { currentCls->add(CLASS_UCP_PS, negated); fret; }; - 'S' => { currentCls->add(CLASS_UCP_S, negated); fret; }; - 'Sc' => { currentCls->add(CLASS_UCP_SC, negated); fret; }; - 'Sk' => { currentCls->add(CLASS_UCP_SK, negated); fret; }; - 'Sm' => { currentCls->add(CLASS_UCP_SM, negated); fret; }; - 'So' => { currentCls->add(CLASS_UCP_SO, negated); fret; }; - 'Z' => { currentCls->add(CLASS_UCP_Z, negated); fret; }; - 'Zl' => { currentCls->add(CLASS_UCP_ZL, negated); fret; }; - 'Zp' => { currentCls->add(CLASS_UCP_ZP, negated); fret; }; - 'Zs' => { currentCls->add(CLASS_UCP_ZS, negated); fret; }; - 'Xan' => { currentCls->add(CLASS_UCP_XAN, negated); fret; }; - 'Xps' => { currentCls->add(CLASS_UCP_XPS, negated); fret; }; - 'Xsp' => { currentCls->add(CLASS_UCP_XSP, negated); fret; }; - 'Xwd' => { currentCls->add(CLASS_UCP_XWD, negated); fret; }; - 'Arabic' => { currentCls->add(CLASS_SCRIPT_ARABIC, negated); fret; }; - 'Armenian' => { currentCls->add(CLASS_SCRIPT_ARMENIAN, negated); fret; }; - 'Avestan' => { currentCls->add(CLASS_SCRIPT_AVESTAN, negated); fret; }; - 'Balinese' => { currentCls->add(CLASS_SCRIPT_BALINESE, negated); fret; }; - 'Bamum' => { currentCls->add(CLASS_SCRIPT_BAMUM, negated); fret; }; - 'Batak' => { currentCls->add(CLASS_SCRIPT_BATAK, negated); fret; }; - 'Bengali' => { currentCls->add(CLASS_SCRIPT_BENGALI, negated); fret; }; - 'Bopomofo' => { currentCls->add(CLASS_SCRIPT_BOPOMOFO, negated); fret; }; - 'Brahmi' => { currentCls->add(CLASS_SCRIPT_BRAHMI, negated); fret; }; - 'Braille' => { currentCls->add(CLASS_SCRIPT_BRAILLE, negated); fret; }; - 'Buginese' => { currentCls->add(CLASS_SCRIPT_BUGINESE, negated); fret; }; - 'Buhid' => { currentCls->add(CLASS_SCRIPT_BUHID, negated); fret; }; - 'Canadian_Aboriginal' => { currentCls->add(CLASS_SCRIPT_CANADIAN_ABORIGINAL, negated); fret; }; - 'Carian' => { currentCls->add(CLASS_SCRIPT_CARIAN, negated); fret; }; - 'Cham' => { currentCls->add(CLASS_SCRIPT_CHAM, negated); fret; }; - 'Cherokee' => { currentCls->add(CLASS_SCRIPT_CHEROKEE, negated); fret; }; - 'Common' => { currentCls->add(CLASS_SCRIPT_COMMON, negated); fret; }; - 'Coptic' => { currentCls->add(CLASS_SCRIPT_COPTIC, negated); fret; }; - 'Cuneiform' => { currentCls->add(CLASS_SCRIPT_CUNEIFORM, negated); fret; }; - 'Cypriot' => { currentCls->add(CLASS_SCRIPT_CYPRIOT, negated); fret; }; - 'Cyrillic' => { currentCls->add(CLASS_SCRIPT_CYRILLIC, negated); fret; }; - 'Deseret' => { currentCls->add(CLASS_SCRIPT_DESERET, negated); fret; }; - 'Devanagari' => { currentCls->add(CLASS_SCRIPT_DEVANAGARI, negated); fret; }; - 'Egyptian_Hieroglyphs' => { currentCls->add(CLASS_SCRIPT_EGYPTIAN_HIEROGLYPHS, negated); fret; }; - 'Ethiopic' => { currentCls->add(CLASS_SCRIPT_ETHIOPIC, negated); fret; }; - 'Georgian' => { currentCls->add(CLASS_SCRIPT_GEORGIAN, negated); fret; }; - 'Glagolitic' => { currentCls->add(CLASS_SCRIPT_GLAGOLITIC, negated); fret; }; - 'Gothic' => { currentCls->add(CLASS_SCRIPT_GOTHIC, negated); fret; }; - 'Greek' => { currentCls->add(CLASS_SCRIPT_GREEK, negated); fret; }; - 'Gujarati' => { currentCls->add(CLASS_SCRIPT_GUJARATI, negated); fret; }; - 'Gurmukhi' => { currentCls->add(CLASS_SCRIPT_GURMUKHI, negated); fret; }; - 'Han' => { currentCls->add(CLASS_SCRIPT_HAN, negated); fret; }; - 'Hangul' => { currentCls->add(CLASS_SCRIPT_HANGUL, negated); fret; }; - 'Hanunoo' => { currentCls->add(CLASS_SCRIPT_HANUNOO, negated); fret; }; - 'Hebrew' => { currentCls->add(CLASS_SCRIPT_HEBREW, negated); fret; }; - 'Hiragana' => { currentCls->add(CLASS_SCRIPT_HIRAGANA, negated); fret; }; - 'Imperial_Aramaic' => { currentCls->add(CLASS_SCRIPT_IMPERIAL_ARAMAIC, negated); fret; }; - 'Inherited' => { currentCls->add(CLASS_SCRIPT_INHERITED, negated); fret; }; - 'Inscriptional_Pahlavi' => { currentCls->add(CLASS_SCRIPT_INSCRIPTIONAL_PAHLAVI, negated); fret; }; - 'Inscriptional_Parthian' => { currentCls->add(CLASS_SCRIPT_INSCRIPTIONAL_PARTHIAN, negated); fret; }; - 'Javanese' => { currentCls->add(CLASS_SCRIPT_JAVANESE, negated); fret; }; - 'Kaithi' => { currentCls->add(CLASS_SCRIPT_KAITHI, negated); fret; }; - 'Kannada' => { currentCls->add(CLASS_SCRIPT_KANNADA, negated); fret; }; - 'Katakana' => { currentCls->add(CLASS_SCRIPT_KATAKANA, negated); fret; }; - 'Kayah_Li' => { currentCls->add(CLASS_SCRIPT_KAYAH_LI, negated); fret; }; - 'Kharoshthi' => { currentCls->add(CLASS_SCRIPT_KHAROSHTHI, negated); fret; }; - 'Khmer' => { currentCls->add(CLASS_SCRIPT_KHMER, negated); fret; }; - 'Lao' => { currentCls->add(CLASS_SCRIPT_LAO, negated); fret; }; - 'Latin' => { currentCls->add(CLASS_SCRIPT_LATIN, negated); fret; }; - 'Lepcha' => { currentCls->add(CLASS_SCRIPT_LEPCHA, negated); fret; }; - 'Limbu' => { currentCls->add(CLASS_SCRIPT_LIMBU, negated); fret; }; - 'Linear_B' => { currentCls->add(CLASS_SCRIPT_LINEAR_B, negated); fret; }; - 'Lisu' => { currentCls->add(CLASS_SCRIPT_LISU, negated); fret; }; - 'Lycian' => { currentCls->add(CLASS_SCRIPT_LYCIAN, negated); fret; }; - 'Lydian' => { currentCls->add(CLASS_SCRIPT_LYDIAN, negated); fret; }; - 'Malayalam' => { currentCls->add(CLASS_SCRIPT_MALAYALAM, negated); fret; }; - 'Mandaic' => { currentCls->add(CLASS_SCRIPT_MANDAIC, negated); fret; }; - 'Meetei_Mayek' => { currentCls->add(CLASS_SCRIPT_MEETEI_MAYEK, negated); fret; }; - 'Mongolian' => { currentCls->add(CLASS_SCRIPT_MONGOLIAN, negated); fret; }; - 'Myanmar' => { currentCls->add(CLASS_SCRIPT_MYANMAR, negated); fret; }; - 'New_Tai_Lue' => { currentCls->add(CLASS_SCRIPT_NEW_TAI_LUE, negated); fret; }; - 'Nko' => { currentCls->add(CLASS_SCRIPT_NKO, negated); fret; }; - 'Ogham' => { currentCls->add(CLASS_SCRIPT_OGHAM, negated); fret; }; - 'Ol_Chiki' => { currentCls->add(CLASS_SCRIPT_OL_CHIKI, negated); fret; }; - 'Old_Italic' => { currentCls->add(CLASS_SCRIPT_OLD_ITALIC, negated); fret; }; - 'Old_Persian' => { currentCls->add(CLASS_SCRIPT_OLD_PERSIAN, negated); fret; }; - 'Old_South_Arabian' => { currentCls->add(CLASS_SCRIPT_OLD_SOUTH_ARABIAN, negated); fret; }; - 'Old_Turkic' => { currentCls->add(CLASS_SCRIPT_OLD_TURKIC, negated); fret; }; - 'Oriya' => { currentCls->add(CLASS_SCRIPT_ORIYA, negated); fret; }; - 'Osmanya' => { currentCls->add(CLASS_SCRIPT_OSMANYA, negated); fret; }; - 'Phags_Pa' => { currentCls->add(CLASS_SCRIPT_PHAGS_PA, negated); fret; }; - 'Phoenician' => { currentCls->add(CLASS_SCRIPT_PHOENICIAN, negated); fret; }; - 'Rejang' => { currentCls->add(CLASS_SCRIPT_REJANG, negated); fret; }; - 'Runic' => { currentCls->add(CLASS_SCRIPT_RUNIC, negated); fret; }; - 'Samaritan' => { currentCls->add(CLASS_SCRIPT_SAMARITAN, negated); fret; }; - 'Saurashtra' => { currentCls->add(CLASS_SCRIPT_SAURASHTRA, negated); fret; }; - 'Shavian' => { currentCls->add(CLASS_SCRIPT_SHAVIAN, negated); fret; }; - 'Sinhala' => { currentCls->add(CLASS_SCRIPT_SINHALA, negated); fret; }; - 'Sundanese' => { currentCls->add(CLASS_SCRIPT_SUNDANESE, negated); fret; }; - 'Syloti_Nagri' => { currentCls->add(CLASS_SCRIPT_SYLOTI_NAGRI, negated); fret; }; - 'Syriac' => { currentCls->add(CLASS_SCRIPT_SYRIAC, negated); fret; }; - 'Tagalog' => { currentCls->add(CLASS_SCRIPT_TAGALOG, negated); fret; }; - 'Tagbanwa' => { currentCls->add(CLASS_SCRIPT_TAGBANWA, negated); fret; }; - 'Tai_Le' => { currentCls->add(CLASS_SCRIPT_TAI_LE, negated); fret; }; - 'Tai_Tham' => { currentCls->add(CLASS_SCRIPT_TAI_THAM, negated); fret; }; - 'Tai_Viet' => { currentCls->add(CLASS_SCRIPT_TAI_VIET, negated); fret; }; - 'Tamil' => { currentCls->add(CLASS_SCRIPT_TAMIL, negated); fret; }; - 'Telugu' => { currentCls->add(CLASS_SCRIPT_TELUGU, negated); fret; }; - 'Thaana' => { currentCls->add(CLASS_SCRIPT_THAANA, negated); fret; }; - 'Thai' => { currentCls->add(CLASS_SCRIPT_THAI, negated); fret; }; - 'Tibetan' => { currentCls->add(CLASS_SCRIPT_TIBETAN, negated); fret; }; - 'Tifinagh' => { currentCls->add(CLASS_SCRIPT_TIFINAGH, negated); fret; }; - 'Ugaritic' => { currentCls->add(CLASS_SCRIPT_UGARITIC, negated); fret; }; - 'Vai' => { currentCls->add(CLASS_SCRIPT_VAI, negated); fret; }; - 'Yi' => { currentCls->add(CLASS_SCRIPT_YI, negated); fret; }; - 'Any' => { currentCls->add(CLASS_UCP_ANY, negated); fret; }; - any => { throw LocatedParseError("Unknown property"); }; - *|; - - readBracedUCP := ('{' - ('^' ${ negated = !negated; }) ? - ([^^] ${ fhold; fcall readUCP; }) - '}' ${ if (!inCharClass) { // not inside [..] - currentCls->finalize(); - currentSeq->addComponent(move(currentCls)); - } - fret; - }) - $^{ throw LocatedParseError("Malformed property"); }; - - readUCPSingle := |* - 'C' => { - currentCls->add(CLASS_UCP_C, negated); - if (!inCharClass) { - currentCls->finalize(); - currentSeq->addComponent(move(currentCls)); - } - fret; - }; - 'L' => { - currentCls->add(CLASS_UCP_L, negated); - if (!inCharClass) { - currentCls->finalize(); - currentSeq->addComponent(move(currentCls)); - } - fret; - }; - 'M' => { - currentCls->add(CLASS_UCP_M, negated); - if (!inCharClass) { - currentCls->finalize(); - currentSeq->addComponent(move(currentCls)); - } - fret; - }; - 'N' => { - currentCls->add(CLASS_UCP_N, negated); - if (!inCharClass) { - currentCls->finalize(); - currentSeq->addComponent(move(currentCls)); - } + }; + any => { + throw LocatedParseError("Unknown control verb"); + }; + *|; + + ############################################################# + # Parser to read UCP + ############################################################# + readUCP := |* + 'C' => { currentCls->add(CLASS_UCP_C, negated); fret; }; + 'Cc' => { currentCls->add(CLASS_UCP_CC, negated); fret; }; + 'Cf' => { currentCls->add(CLASS_UCP_CF, negated); fret; }; + 'Cn' => { currentCls->add(CLASS_UCP_CN, negated); fret; }; + 'Co' => { currentCls->add(CLASS_UCP_CO, negated); fret; }; + 'Cs' => { currentCls->add(CLASS_UCP_CS, negated); fret; }; + 'L' => { currentCls->add(CLASS_UCP_L, negated); fret; }; + 'Ll' => { currentCls->add(CLASS_UCP_LL, negated); fret; }; + 'Lm' => { currentCls->add(CLASS_UCP_LM, negated); fret; }; + 'Lo' => { currentCls->add(CLASS_UCP_LO, negated); fret; }; + 'Lt' => { currentCls->add(CLASS_UCP_LT, negated); fret; }; + 'Lu' => { currentCls->add(CLASS_UCP_LU, negated); fret; }; + 'L&' => { currentCls->add(CLASS_UCP_L_AND, negated); fret; }; + 'M' => { currentCls->add(CLASS_UCP_M, negated); fret; }; + 'Mc' => { currentCls->add(CLASS_UCP_MC, negated); fret; }; + 'Me' => { currentCls->add(CLASS_UCP_ME, negated); fret; }; + 'Mn' => { currentCls->add(CLASS_UCP_MN, negated); fret; }; + 'N' => { currentCls->add(CLASS_UCP_N, negated); fret; }; + 'Nd' => { currentCls->add(CLASS_UCP_ND, negated); fret; }; + 'Nl' => { currentCls->add(CLASS_UCP_NL, negated); fret; }; + 'No' => { currentCls->add(CLASS_UCP_NO, negated); fret; }; + 'P' => { currentCls->add(CLASS_UCP_P, negated); fret; }; + 'Pc' => { currentCls->add(CLASS_UCP_PC, negated); fret; }; + 'Pd' => { currentCls->add(CLASS_UCP_PD, negated); fret; }; + 'Pe' => { currentCls->add(CLASS_UCP_PE, negated); fret; }; + 'Pf' => { currentCls->add(CLASS_UCP_PF, negated); fret; }; + 'Pi' => { currentCls->add(CLASS_UCP_PI, negated); fret; }; + 'Po' => { currentCls->add(CLASS_UCP_PO, negated); fret; }; + 'Ps' => { currentCls->add(CLASS_UCP_PS, negated); fret; }; + 'S' => { currentCls->add(CLASS_UCP_S, negated); fret; }; + 'Sc' => { currentCls->add(CLASS_UCP_SC, negated); fret; }; + 'Sk' => { currentCls->add(CLASS_UCP_SK, negated); fret; }; + 'Sm' => { currentCls->add(CLASS_UCP_SM, negated); fret; }; + 'So' => { currentCls->add(CLASS_UCP_SO, negated); fret; }; + 'Z' => { currentCls->add(CLASS_UCP_Z, negated); fret; }; + 'Zl' => { currentCls->add(CLASS_UCP_ZL, negated); fret; }; + 'Zp' => { currentCls->add(CLASS_UCP_ZP, negated); fret; }; + 'Zs' => { currentCls->add(CLASS_UCP_ZS, negated); fret; }; + 'Xan' => { currentCls->add(CLASS_UCP_XAN, negated); fret; }; + 'Xps' => { currentCls->add(CLASS_UCP_XPS, negated); fret; }; + 'Xsp' => { currentCls->add(CLASS_UCP_XSP, negated); fret; }; + 'Xwd' => { currentCls->add(CLASS_UCP_XWD, negated); fret; }; + 'Arabic' => { currentCls->add(CLASS_SCRIPT_ARABIC, negated); fret; }; + 'Armenian' => { currentCls->add(CLASS_SCRIPT_ARMENIAN, negated); fret; }; + 'Avestan' => { currentCls->add(CLASS_SCRIPT_AVESTAN, negated); fret; }; + 'Balinese' => { currentCls->add(CLASS_SCRIPT_BALINESE, negated); fret; }; + 'Bamum' => { currentCls->add(CLASS_SCRIPT_BAMUM, negated); fret; }; + 'Batak' => { currentCls->add(CLASS_SCRIPT_BATAK, negated); fret; }; + 'Bengali' => { currentCls->add(CLASS_SCRIPT_BENGALI, negated); fret; }; + 'Bopomofo' => { currentCls->add(CLASS_SCRIPT_BOPOMOFO, negated); fret; }; + 'Brahmi' => { currentCls->add(CLASS_SCRIPT_BRAHMI, negated); fret; }; + 'Braille' => { currentCls->add(CLASS_SCRIPT_BRAILLE, negated); fret; }; + 'Buginese' => { currentCls->add(CLASS_SCRIPT_BUGINESE, negated); fret; }; + 'Buhid' => { currentCls->add(CLASS_SCRIPT_BUHID, negated); fret; }; + 'Canadian_Aboriginal' => { currentCls->add(CLASS_SCRIPT_CANADIAN_ABORIGINAL, negated); fret; }; + 'Carian' => { currentCls->add(CLASS_SCRIPT_CARIAN, negated); fret; }; + 'Cham' => { currentCls->add(CLASS_SCRIPT_CHAM, negated); fret; }; + 'Cherokee' => { currentCls->add(CLASS_SCRIPT_CHEROKEE, negated); fret; }; + 'Common' => { currentCls->add(CLASS_SCRIPT_COMMON, negated); fret; }; + 'Coptic' => { currentCls->add(CLASS_SCRIPT_COPTIC, negated); fret; }; + 'Cuneiform' => { currentCls->add(CLASS_SCRIPT_CUNEIFORM, negated); fret; }; + 'Cypriot' => { currentCls->add(CLASS_SCRIPT_CYPRIOT, negated); fret; }; + 'Cyrillic' => { currentCls->add(CLASS_SCRIPT_CYRILLIC, negated); fret; }; + 'Deseret' => { currentCls->add(CLASS_SCRIPT_DESERET, negated); fret; }; + 'Devanagari' => { currentCls->add(CLASS_SCRIPT_DEVANAGARI, negated); fret; }; + 'Egyptian_Hieroglyphs' => { currentCls->add(CLASS_SCRIPT_EGYPTIAN_HIEROGLYPHS, negated); fret; }; + 'Ethiopic' => { currentCls->add(CLASS_SCRIPT_ETHIOPIC, negated); fret; }; + 'Georgian' => { currentCls->add(CLASS_SCRIPT_GEORGIAN, negated); fret; }; + 'Glagolitic' => { currentCls->add(CLASS_SCRIPT_GLAGOLITIC, negated); fret; }; + 'Gothic' => { currentCls->add(CLASS_SCRIPT_GOTHIC, negated); fret; }; + 'Greek' => { currentCls->add(CLASS_SCRIPT_GREEK, negated); fret; }; + 'Gujarati' => { currentCls->add(CLASS_SCRIPT_GUJARATI, negated); fret; }; + 'Gurmukhi' => { currentCls->add(CLASS_SCRIPT_GURMUKHI, negated); fret; }; + 'Han' => { currentCls->add(CLASS_SCRIPT_HAN, negated); fret; }; + 'Hangul' => { currentCls->add(CLASS_SCRIPT_HANGUL, negated); fret; }; + 'Hanunoo' => { currentCls->add(CLASS_SCRIPT_HANUNOO, negated); fret; }; + 'Hebrew' => { currentCls->add(CLASS_SCRIPT_HEBREW, negated); fret; }; + 'Hiragana' => { currentCls->add(CLASS_SCRIPT_HIRAGANA, negated); fret; }; + 'Imperial_Aramaic' => { currentCls->add(CLASS_SCRIPT_IMPERIAL_ARAMAIC, negated); fret; }; + 'Inherited' => { currentCls->add(CLASS_SCRIPT_INHERITED, negated); fret; }; + 'Inscriptional_Pahlavi' => { currentCls->add(CLASS_SCRIPT_INSCRIPTIONAL_PAHLAVI, negated); fret; }; + 'Inscriptional_Parthian' => { currentCls->add(CLASS_SCRIPT_INSCRIPTIONAL_PARTHIAN, negated); fret; }; + 'Javanese' => { currentCls->add(CLASS_SCRIPT_JAVANESE, negated); fret; }; + 'Kaithi' => { currentCls->add(CLASS_SCRIPT_KAITHI, negated); fret; }; + 'Kannada' => { currentCls->add(CLASS_SCRIPT_KANNADA, negated); fret; }; + 'Katakana' => { currentCls->add(CLASS_SCRIPT_KATAKANA, negated); fret; }; + 'Kayah_Li' => { currentCls->add(CLASS_SCRIPT_KAYAH_LI, negated); fret; }; + 'Kharoshthi' => { currentCls->add(CLASS_SCRIPT_KHAROSHTHI, negated); fret; }; + 'Khmer' => { currentCls->add(CLASS_SCRIPT_KHMER, negated); fret; }; + 'Lao' => { currentCls->add(CLASS_SCRIPT_LAO, negated); fret; }; + 'Latin' => { currentCls->add(CLASS_SCRIPT_LATIN, negated); fret; }; + 'Lepcha' => { currentCls->add(CLASS_SCRIPT_LEPCHA, negated); fret; }; + 'Limbu' => { currentCls->add(CLASS_SCRIPT_LIMBU, negated); fret; }; + 'Linear_B' => { currentCls->add(CLASS_SCRIPT_LINEAR_B, negated); fret; }; + 'Lisu' => { currentCls->add(CLASS_SCRIPT_LISU, negated); fret; }; + 'Lycian' => { currentCls->add(CLASS_SCRIPT_LYCIAN, negated); fret; }; + 'Lydian' => { currentCls->add(CLASS_SCRIPT_LYDIAN, negated); fret; }; + 'Malayalam' => { currentCls->add(CLASS_SCRIPT_MALAYALAM, negated); fret; }; + 'Mandaic' => { currentCls->add(CLASS_SCRIPT_MANDAIC, negated); fret; }; + 'Meetei_Mayek' => { currentCls->add(CLASS_SCRIPT_MEETEI_MAYEK, negated); fret; }; + 'Mongolian' => { currentCls->add(CLASS_SCRIPT_MONGOLIAN, negated); fret; }; + 'Myanmar' => { currentCls->add(CLASS_SCRIPT_MYANMAR, negated); fret; }; + 'New_Tai_Lue' => { currentCls->add(CLASS_SCRIPT_NEW_TAI_LUE, negated); fret; }; + 'Nko' => { currentCls->add(CLASS_SCRIPT_NKO, negated); fret; }; + 'Ogham' => { currentCls->add(CLASS_SCRIPT_OGHAM, negated); fret; }; + 'Ol_Chiki' => { currentCls->add(CLASS_SCRIPT_OL_CHIKI, negated); fret; }; + 'Old_Italic' => { currentCls->add(CLASS_SCRIPT_OLD_ITALIC, negated); fret; }; + 'Old_Persian' => { currentCls->add(CLASS_SCRIPT_OLD_PERSIAN, negated); fret; }; + 'Old_South_Arabian' => { currentCls->add(CLASS_SCRIPT_OLD_SOUTH_ARABIAN, negated); fret; }; + 'Old_Turkic' => { currentCls->add(CLASS_SCRIPT_OLD_TURKIC, negated); fret; }; + 'Oriya' => { currentCls->add(CLASS_SCRIPT_ORIYA, negated); fret; }; + 'Osmanya' => { currentCls->add(CLASS_SCRIPT_OSMANYA, negated); fret; }; + 'Phags_Pa' => { currentCls->add(CLASS_SCRIPT_PHAGS_PA, negated); fret; }; + 'Phoenician' => { currentCls->add(CLASS_SCRIPT_PHOENICIAN, negated); fret; }; + 'Rejang' => { currentCls->add(CLASS_SCRIPT_REJANG, negated); fret; }; + 'Runic' => { currentCls->add(CLASS_SCRIPT_RUNIC, negated); fret; }; + 'Samaritan' => { currentCls->add(CLASS_SCRIPT_SAMARITAN, negated); fret; }; + 'Saurashtra' => { currentCls->add(CLASS_SCRIPT_SAURASHTRA, negated); fret; }; + 'Shavian' => { currentCls->add(CLASS_SCRIPT_SHAVIAN, negated); fret; }; + 'Sinhala' => { currentCls->add(CLASS_SCRIPT_SINHALA, negated); fret; }; + 'Sundanese' => { currentCls->add(CLASS_SCRIPT_SUNDANESE, negated); fret; }; + 'Syloti_Nagri' => { currentCls->add(CLASS_SCRIPT_SYLOTI_NAGRI, negated); fret; }; + 'Syriac' => { currentCls->add(CLASS_SCRIPT_SYRIAC, negated); fret; }; + 'Tagalog' => { currentCls->add(CLASS_SCRIPT_TAGALOG, negated); fret; }; + 'Tagbanwa' => { currentCls->add(CLASS_SCRIPT_TAGBANWA, negated); fret; }; + 'Tai_Le' => { currentCls->add(CLASS_SCRIPT_TAI_LE, negated); fret; }; + 'Tai_Tham' => { currentCls->add(CLASS_SCRIPT_TAI_THAM, negated); fret; }; + 'Tai_Viet' => { currentCls->add(CLASS_SCRIPT_TAI_VIET, negated); fret; }; + 'Tamil' => { currentCls->add(CLASS_SCRIPT_TAMIL, negated); fret; }; + 'Telugu' => { currentCls->add(CLASS_SCRIPT_TELUGU, negated); fret; }; + 'Thaana' => { currentCls->add(CLASS_SCRIPT_THAANA, negated); fret; }; + 'Thai' => { currentCls->add(CLASS_SCRIPT_THAI, negated); fret; }; + 'Tibetan' => { currentCls->add(CLASS_SCRIPT_TIBETAN, negated); fret; }; + 'Tifinagh' => { currentCls->add(CLASS_SCRIPT_TIFINAGH, negated); fret; }; + 'Ugaritic' => { currentCls->add(CLASS_SCRIPT_UGARITIC, negated); fret; }; + 'Vai' => { currentCls->add(CLASS_SCRIPT_VAI, negated); fret; }; + 'Yi' => { currentCls->add(CLASS_SCRIPT_YI, negated); fret; }; + 'Any' => { currentCls->add(CLASS_UCP_ANY, negated); fret; }; + any => { throw LocatedParseError("Unknown property"); }; + *|; + + readBracedUCP := ('{' + ('^' ${ negated = !negated; }) ? + ([^^] ${ fhold; fcall readUCP; }) + '}' ${ if (!inCharClass) { // not inside [..] + currentCls->finalize(); + currentSeq->addComponent(move(currentCls)); + } + fret; + }) + $^{ throw LocatedParseError("Malformed property"); }; + + readUCPSingle := |* + 'C' => { + currentCls->add(CLASS_UCP_C, negated); + if (!inCharClass) { + currentCls->finalize(); + currentSeq->addComponent(move(currentCls)); + } + fret; + }; + 'L' => { + currentCls->add(CLASS_UCP_L, negated); + if (!inCharClass) { + currentCls->finalize(); + currentSeq->addComponent(move(currentCls)); + } fret; - }; - 'P' => { - currentCls->add(CLASS_UCP_P, negated); - if (!inCharClass) { - currentCls->finalize(); - currentSeq->addComponent(move(currentCls)); - } - fret; - }; - 'S' => { - currentCls->add(CLASS_UCP_S, negated); - if (!inCharClass) { - currentCls->finalize(); - currentSeq->addComponent(move(currentCls)); - } - fret; - }; - 'Z' => { - currentCls->add(CLASS_UCP_Z, negated); - if (!inCharClass) { - currentCls->finalize(); - currentSeq->addComponent(move(currentCls)); - } - fret; - }; - - any => { throw LocatedParseError("Unknown property"); }; - *|; - charClassGuts := |* - # We don't support POSIX collating elements (neither does PCRE - # or Perl). These look like [.ch.] or [=ch=]. - '\[\.' ( '\\]' | [^\]] )* '\.\]' | - '\[=' ( '\\]' | [^\]] )* '=\]' => { - throw LocatedParseError("Unsupported POSIX collating " - "element"); - }; - # Named sets - # Adding these may cause the charclass to close, hence the - # finalized check - UE-2276 - '[:alnum:]' => { - currentCls->add(CLASS_ALNUM, false); - }; - '[:^alnum:]' => { - currentCls->add(CLASS_ALNUM, true); - }; - '[:alpha:]' => { - currentCls->add(CLASS_ALPHA, false); - }; - '[:^alpha:]' => { - currentCls->add(CLASS_ALPHA, true); - }; - '[:ascii:]' => { - currentCls->add(CLASS_ASCII, false); - }; - '[:^ascii:]' => { - currentCls->add(CLASS_ASCII, true); - }; - '[:blank:]' => { - currentCls->add(CLASS_BLANK, false); - }; - '[:^blank:]' => { - currentCls->add(CLASS_BLANK, true); - }; - '[:cntrl:]' => { - currentCls->add(CLASS_CNTRL, false); - }; - '[:^cntrl:]' => { - currentCls->add(CLASS_CNTRL, true); - }; - '[:digit:]' => { - currentCls->add(CLASS_DIGIT, false); - }; - '[:^digit:]' => { - currentCls->add(CLASS_DIGIT, true); - }; - '[:graph:]' => { - currentCls->add(CLASS_GRAPH, false); - }; - '[:^graph:]' => { - currentCls->add(CLASS_GRAPH, true); - }; - '[:lower:]' => { - currentCls->add(CLASS_LOWER, false); - }; - '[:^lower:]' => { - currentCls->add(CLASS_LOWER, true); - }; - '[:print:]' => { - currentCls->add(CLASS_PRINT, false); - }; - '[:^print:]' => { - currentCls->add(CLASS_PRINT, true); - }; - '[:punct:]' => { - currentCls->add(CLASS_PUNCT, false); - }; - '[:^punct:]' => { - currentCls->add(CLASS_PUNCT, true); - }; - # Posix SPACE covers 9, 10, 11, 12, 13, 32 - '[:space:]' => { - currentCls->add(CLASS_SPACE, false); - }; - '[:^space:]' => { - currentCls->add(CLASS_SPACE, true); - }; - '[:upper:]' => { - currentCls->add(CLASS_UPPER, false); - }; - '[:^upper:]' => { - currentCls->add(CLASS_UPPER, true); - }; - '[:word:]' => { - currentCls->add(CLASS_WORD, false); - }; - '[:^word:]' => { - currentCls->add(CLASS_WORD, true); - }; - '[:xdigit:]' => { - currentCls->add(CLASS_XDIGIT, false); - }; - '[:^xdigit:]' => { - currentCls->add(CLASS_XDIGIT, true); - }; - # Anything else between "[:" and ":]" is an invalid POSIX class. - # Note that "\]" counts as a literal char here. - '\[:' ( '\\]' | [^\]] )* ':\]' => { - throw LocatedParseError("Invalid POSIX named class"); - }; - '\\Q' => { - fcall readQuotedClass; - }; - '\\E' => { /*noop*/}; - # Backspace (this is only valid for \b in char classes) - '\\b' => { - currentCls->add('\x08'); - }; - # Tab - '\\t' => { - currentCls->add('\x09'); - }; - # Newline - '\\n' => { - currentCls->add('\x0a'); - }; - # Carriage return - '\\r' => { - currentCls->add('\x0d'); - }; - # Form feed - '\\f' => { - currentCls->add('\x0c'); - }; - # Bell - '\\a' => { - currentCls->add('\x07'); - }; - # Escape - '\\e' => { - currentCls->add('\x1b'); - }; - # Horizontal whitespace - '\\h' => { - currentCls->add(CLASS_HORZ, false); - }; - # Not horizontal whitespace - '\\H' => { - currentCls->add(CLASS_HORZ, true); - }; - # Vertical whitespace - '\\v' => { - currentCls->add(CLASS_VERT, false); - }; - # Not vertical whitespace - '\\V' => { - currentCls->add(CLASS_VERT, true); - }; - - '\\p{' => { - negated = false; - fhold; - fcall readBracedUCP; - }; - - '\\p' any => { - negated = false; - fhold; - fcall readUCPSingle; - }; - - '\\P{' => { - negated = true; - fhold; - fcall readBracedUCP; - }; - - '\\P'any => { - negated = true; - fhold; - fcall readUCPSingle; - }; - - '\\P' => { throw LocatedParseError("Malformed property"); }; - '\\p' => { throw LocatedParseError("Malformed property"); }; - - # Octal - escapedOctal0 => { - currentCls->add(octAccumulator); - }; - escapedOctal2c => { - currentCls->add(octAccumulator); - }; - - '\\o{' [0-7]+ '}' => { + }; + 'M' => { + currentCls->add(CLASS_UCP_M, negated); + if (!inCharClass) { + currentCls->finalize(); + currentSeq->addComponent(move(currentCls)); + } + fret; + }; + 'N' => { + currentCls->add(CLASS_UCP_N, negated); + if (!inCharClass) { + currentCls->finalize(); + currentSeq->addComponent(move(currentCls)); + } + fret; + }; + 'P' => { + currentCls->add(CLASS_UCP_P, negated); + if (!inCharClass) { + currentCls->finalize(); + currentSeq->addComponent(move(currentCls)); + } + fret; + }; + 'S' => { + currentCls->add(CLASS_UCP_S, negated); + if (!inCharClass) { + currentCls->finalize(); + currentSeq->addComponent(move(currentCls)); + } + fret; + }; + 'Z' => { + currentCls->add(CLASS_UCP_Z, negated); + if (!inCharClass) { + currentCls->finalize(); + currentSeq->addComponent(move(currentCls)); + } + fret; + }; + + any => { throw LocatedParseError("Unknown property"); }; + *|; + charClassGuts := |* + # We don't support POSIX collating elements (neither does PCRE + # or Perl). These look like [.ch.] or [=ch=]. + '\[\.' ( '\\]' | [^\]] )* '\.\]' | + '\[=' ( '\\]' | [^\]] )* '=\]' => { + throw LocatedParseError("Unsupported POSIX collating " + "element"); + }; + # Named sets + # Adding these may cause the charclass to close, hence the + # finalized check - UE-2276 + '[:alnum:]' => { + currentCls->add(CLASS_ALNUM, false); + }; + '[:^alnum:]' => { + currentCls->add(CLASS_ALNUM, true); + }; + '[:alpha:]' => { + currentCls->add(CLASS_ALPHA, false); + }; + '[:^alpha:]' => { + currentCls->add(CLASS_ALPHA, true); + }; + '[:ascii:]' => { + currentCls->add(CLASS_ASCII, false); + }; + '[:^ascii:]' => { + currentCls->add(CLASS_ASCII, true); + }; + '[:blank:]' => { + currentCls->add(CLASS_BLANK, false); + }; + '[:^blank:]' => { + currentCls->add(CLASS_BLANK, true); + }; + '[:cntrl:]' => { + currentCls->add(CLASS_CNTRL, false); + }; + '[:^cntrl:]' => { + currentCls->add(CLASS_CNTRL, true); + }; + '[:digit:]' => { + currentCls->add(CLASS_DIGIT, false); + }; + '[:^digit:]' => { + currentCls->add(CLASS_DIGIT, true); + }; + '[:graph:]' => { + currentCls->add(CLASS_GRAPH, false); + }; + '[:^graph:]' => { + currentCls->add(CLASS_GRAPH, true); + }; + '[:lower:]' => { + currentCls->add(CLASS_LOWER, false); + }; + '[:^lower:]' => { + currentCls->add(CLASS_LOWER, true); + }; + '[:print:]' => { + currentCls->add(CLASS_PRINT, false); + }; + '[:^print:]' => { + currentCls->add(CLASS_PRINT, true); + }; + '[:punct:]' => { + currentCls->add(CLASS_PUNCT, false); + }; + '[:^punct:]' => { + currentCls->add(CLASS_PUNCT, true); + }; + # Posix SPACE covers 9, 10, 11, 12, 13, 32 + '[:space:]' => { + currentCls->add(CLASS_SPACE, false); + }; + '[:^space:]' => { + currentCls->add(CLASS_SPACE, true); + }; + '[:upper:]' => { + currentCls->add(CLASS_UPPER, false); + }; + '[:^upper:]' => { + currentCls->add(CLASS_UPPER, true); + }; + '[:word:]' => { + currentCls->add(CLASS_WORD, false); + }; + '[:^word:]' => { + currentCls->add(CLASS_WORD, true); + }; + '[:xdigit:]' => { + currentCls->add(CLASS_XDIGIT, false); + }; + '[:^xdigit:]' => { + currentCls->add(CLASS_XDIGIT, true); + }; + # Anything else between "[:" and ":]" is an invalid POSIX class. + # Note that "\]" counts as a literal char here. + '\[:' ( '\\]' | [^\]] )* ':\]' => { + throw LocatedParseError("Invalid POSIX named class"); + }; + '\\Q' => { + fcall readQuotedClass; + }; + '\\E' => { /*noop*/}; + # Backspace (this is only valid for \b in char classes) + '\\b' => { + currentCls->add('\x08'); + }; + # Tab + '\\t' => { + currentCls->add('\x09'); + }; + # Newline + '\\n' => { + currentCls->add('\x0a'); + }; + # Carriage return + '\\r' => { + currentCls->add('\x0d'); + }; + # Form feed + '\\f' => { + currentCls->add('\x0c'); + }; + # Bell + '\\a' => { + currentCls->add('\x07'); + }; + # Escape + '\\e' => { + currentCls->add('\x1b'); + }; + # Horizontal whitespace + '\\h' => { + currentCls->add(CLASS_HORZ, false); + }; + # Not horizontal whitespace + '\\H' => { + currentCls->add(CLASS_HORZ, true); + }; + # Vertical whitespace + '\\v' => { + currentCls->add(CLASS_VERT, false); + }; + # Not vertical whitespace + '\\V' => { + currentCls->add(CLASS_VERT, true); + }; + + '\\p{' => { + negated = false; + fhold; + fcall readBracedUCP; + }; + + '\\p' any => { + negated = false; + fhold; + fcall readUCPSingle; + }; + + '\\P{' => { + negated = true; + fhold; + fcall readBracedUCP; + }; + + '\\P'any => { + negated = true; + fhold; + fcall readUCPSingle; + }; + + '\\P' => { throw LocatedParseError("Malformed property"); }; + '\\p' => { throw LocatedParseError("Malformed property"); }; + + # Octal + escapedOctal0 => { + currentCls->add(octAccumulator); + }; + escapedOctal2c => { + currentCls->add(octAccumulator); + }; + + '\\o{' [0-7]+ '}' => { string oct(ts + 3, te - ts - 4); unsigned long val; try { @@ -983,29 +983,29 @@ unichar readUtf8CodePoint4c(const char *s) { } catch (const std::out_of_range &) { val = MAX_UNICODE + 1; } - if ((!mode.utf8 && val > 255) || val > MAX_UNICODE) { - throw LocatedParseError("Value in \\o{...} sequence is too large"); - } - currentCls->add((unichar)val); - }; - - # And for when it goes wrong - '\\o' => { - throw LocatedParseError("Value in \\o{...} sequence is non-octal or missing braces"); - }; - - # Hex - escapedHex => { - currentCls->add(accumulator); - }; - # not a back-ref, not octal, just PCRE madness - '\\' [89] => { - // whatever we found here - currentCls->add(*(ts + 1)); - - }; - # Unicode Hex - '\\x{' xdigit+ '}' => { + if ((!mode.utf8 && val > 255) || val > MAX_UNICODE) { + throw LocatedParseError("Value in \\o{...} sequence is too large"); + } + currentCls->add((unichar)val); + }; + + # And for when it goes wrong + '\\o' => { + throw LocatedParseError("Value in \\o{...} sequence is non-octal or missing braces"); + }; + + # Hex + escapedHex => { + currentCls->add(accumulator); + }; + # not a back-ref, not octal, just PCRE madness + '\\' [89] => { + // whatever we found here + currentCls->add(*(ts + 1)); + + }; + # Unicode Hex + '\\x{' xdigit+ '}' => { string hex(ts + 3, te - ts - 4); unsigned long val; try { @@ -1013,148 +1013,148 @@ unichar readUtf8CodePoint4c(const char *s) { } catch (const std::out_of_range &) { val = MAX_UNICODE + 1; } - if (val > MAX_UNICODE) { - throw LocatedParseError("Value in \\x{...} sequence is too large"); - } - currentCls->add((unichar)val); - }; - # And for when it goes wrong - '\\x{' => { - throw LocatedParseError("Value in \\x{...} sequence is non-hex or missing }"); - }; - # Control characters - escapedCtrl => { - if (te - ts < 3) { - assert(te - ts == 2); - throw LocatedParseError(SLASH_C_ERROR); - } else { - assert(te - ts == 3); - currentCls->add(decodeCtrl(ts[2])); - } - }; - # Word character - '\\w' => { - currentCls->add(CLASS_WORD, false); - }; - # Non word character - '\\W' => { - currentCls->add(CLASS_WORD, true); - }; - # Whitespace character (except VT) - '\\s' => { - currentCls->add(CLASS_SPACE, false); - }; - # Non whitespace character - '\\S' => { - currentCls->add(CLASS_SPACE, true); - }; - # Digit character - '\\d' => { - currentCls->add(CLASS_DIGIT, false); - }; - # Non digit character - '\\D' => { - currentCls->add(CLASS_DIGIT, true); - }; - '\-' => { - currentCls->addDash(); - }; - - # A bunch of unsupported (for now) escapes - escapedUnsupported - '\\X' => throwUnsupportedEscape; - - # PCRE appears to discard escaped g in a char class (a backref bug?) - '\\g' => throwUnsupportedEscape; - - # the too-hard basket: UE-944, UE-1134, UE-1157 - # many escaped single char literals shold be benign, but PCRE - # breaks with them when adding to ranges, so unless they have - # defined special meaning in a char-class we reject them to be - # safe. - '\\' alpha => throwUnsupportedEscape; - - '\\' any => { - // add the literal char - currentCls->add(*(ts + 1)); - }; - - #unicode chars - utf8_2c when is_utf8 => { - assert(mode.utf8); - currentCls->add(readUtf8CodePoint2c(ts)); - }; - - utf8_3c when is_utf8 => { - assert(mode.utf8); - currentCls->add(readUtf8CodePoint3c(ts)); - }; - - utf8_4c when is_utf8 => { - assert(mode.utf8); - currentCls->add(readUtf8CodePoint4c(ts)); - }; - - hi_byte when is_utf8 => { - assert(mode.utf8); - throwInvalidUtf8(); - }; - - # Literal character - (any - ']') => { + if (val > MAX_UNICODE) { + throw LocatedParseError("Value in \\x{...} sequence is too large"); + } + currentCls->add((unichar)val); + }; + # And for when it goes wrong + '\\x{' => { + throw LocatedParseError("Value in \\x{...} sequence is non-hex or missing }"); + }; + # Control characters + escapedCtrl => { + if (te - ts < 3) { + assert(te - ts == 2); + throw LocatedParseError(SLASH_C_ERROR); + } else { + assert(te - ts == 3); + currentCls->add(decodeCtrl(ts[2])); + } + }; + # Word character + '\\w' => { + currentCls->add(CLASS_WORD, false); + }; + # Non word character + '\\W' => { + currentCls->add(CLASS_WORD, true); + }; + # Whitespace character (except VT) + '\\s' => { + currentCls->add(CLASS_SPACE, false); + }; + # Non whitespace character + '\\S' => { + currentCls->add(CLASS_SPACE, true); + }; + # Digit character + '\\d' => { + currentCls->add(CLASS_DIGIT, false); + }; + # Non digit character + '\\D' => { + currentCls->add(CLASS_DIGIT, true); + }; + '\-' => { + currentCls->addDash(); + }; + + # A bunch of unsupported (for now) escapes + escapedUnsupported - '\\X' => throwUnsupportedEscape; + + # PCRE appears to discard escaped g in a char class (a backref bug?) + '\\g' => throwUnsupportedEscape; + + # the too-hard basket: UE-944, UE-1134, UE-1157 + # many escaped single char literals shold be benign, but PCRE + # breaks with them when adding to ranges, so unless they have + # defined special meaning in a char-class we reject them to be + # safe. + '\\' alpha => throwUnsupportedEscape; + + '\\' any => { + // add the literal char + currentCls->add(*(ts + 1)); + }; + + #unicode chars + utf8_2c when is_utf8 => { + assert(mode.utf8); + currentCls->add(readUtf8CodePoint2c(ts)); + }; + + utf8_3c when is_utf8 => { + assert(mode.utf8); + currentCls->add(readUtf8CodePoint3c(ts)); + }; + + utf8_4c when is_utf8 => { + assert(mode.utf8); + currentCls->add(readUtf8CodePoint4c(ts)); + }; + + hi_byte when is_utf8 => { + assert(mode.utf8); + throwInvalidUtf8(); + }; + + # Literal character + (any - ']') => { currentCls->add((u8)*ts); - }; - - ']' => { - currentCls->finalize(); - currentSeq->addComponent(move(currentCls)); - inCharClass = false; - fgoto main; - }; - *|; - - ############################################################# - # Parser to read stuff from a character class - ############################################################# - readClass := |* - # A caret at the beginning of the class means that the rest of the - # class is negated. - '\^' when is_early_charclass => { - if (currentCls->isNegated()) { - // Already seen a caret; the second one is not a meta-character. - inCharClassEarly = false; - fhold; fgoto charClassGuts; - } else { - currentCls->negate(); - // Note: we cannot switch off inCharClassEarly here, as /[^]]/ - // needs to use the right square bracket path below. - } - }; - # A right square bracket before anything "real" is interpreted as a - # literal right square bracket. - ']' when is_early_charclass => { - currentCls->add(']'); - inCharClassEarly = false; - }; - # if we hit a quote before anything "real", handle it - '\\Q' => { fcall readQuotedClass; }; - '\\E' => { /*noop*/}; - - # time for the real work to happen - any => { - inCharClassEarly = false; - fhold; - fgoto charClassGuts; - }; - *|; - - ############################################################# - # Parser to read a quoted literal - ############################################################# - readQuotedLiteral := |* - # Escape sequence - '\\E' => { - fgoto main; - }; + }; + + ']' => { + currentCls->finalize(); + currentSeq->addComponent(move(currentCls)); + inCharClass = false; + fgoto main; + }; + *|; + + ############################################################# + # Parser to read stuff from a character class + ############################################################# + readClass := |* + # A caret at the beginning of the class means that the rest of the + # class is negated. + '\^' when is_early_charclass => { + if (currentCls->isNegated()) { + // Already seen a caret; the second one is not a meta-character. + inCharClassEarly = false; + fhold; fgoto charClassGuts; + } else { + currentCls->negate(); + // Note: we cannot switch off inCharClassEarly here, as /[^]]/ + // needs to use the right square bracket path below. + } + }; + # A right square bracket before anything "real" is interpreted as a + # literal right square bracket. + ']' when is_early_charclass => { + currentCls->add(']'); + inCharClassEarly = false; + }; + # if we hit a quote before anything "real", handle it + '\\Q' => { fcall readQuotedClass; }; + '\\E' => { /*noop*/}; + + # time for the real work to happen + any => { + inCharClassEarly = false; + fhold; + fgoto charClassGuts; + }; + *|; + + ############################################################# + # Parser to read a quoted literal + ############################################################# + readQuotedLiteral := |* + # Escape sequence + '\\E' => { + fgoto main; + }; #unicode chars utf8_2c when is_utf8 => { @@ -1189,20 +1189,20 @@ unichar readUtf8CodePoint4c(const char *s) { throwInvalidUtf8(); }; - # Literal character - any => { - addLiteral(currentSeq, *ts, mode); - }; - *|; - - ############################################################# - # Parser to read a quoted class - ############################################################# - readQuotedClass := |* - # Escape sequence - '\\E' => { - fret; - }; + # Literal character + any => { + addLiteral(currentSeq, *ts, mode); + }; + *|; + + ############################################################# + # Parser to read a quoted class + ############################################################# + readQuotedClass := |* + # Escape sequence + '\\E' => { + fret; + }; #unicode chars utf8_2c when is_utf8 => { @@ -1228,337 +1228,337 @@ unichar readUtf8CodePoint4c(const char *s) { throwInvalidUtf8(); }; - # Literal character - any => { - currentCls->add(*ts); - inCharClassEarly = false; - }; - *|; - - - ############################################################# - # Parser to read (and ignore) a comment block - ############################################################# - readComment := |* - # Right paren - '\)' => { inComment = false; fgoto main; }; - - # absolutely everything gets ignored until we see a right - # paren - any; - *|; - - ############################################################# - # Parser to read (and ignore) a newline-terminated comment - # block - ############################################################# - readNewlineTerminatedComment := |* - '\n' => { inComment = false; fgoto main; }; - - # absolutely everything gets ignored until we see a - # newline - any; - *|; - - ############################################################# - # Parser for standard components - ############################################################# - main := |* - ############################################################# - # Standard components - ############################################################# - # Begin capturing group (non-capturing handled further down) - '\(' => enterCapturingGroup; - # End group - '\)' => exitGroup; - # Mark alternation - '\|' => { - currentSeq->addAlternation(); - }; - # POSIX named elements should only be used inside a class. Note - # that we need to be able to reject /[:\]:]/ here. - '\[:' ( '\\]' | [^\]] )* ':\]' => { - throw LocatedParseError("POSIX named classes are only " - "supported inside a class"); - }; - # We don't support POSIX collating elements (neither does PCRE - # or Perl). These look like [.ch.] or [=ch=]. - '\[\.' ( '\\]' | [^\]] )* '\.\]' | - '\[=' ( '\\]' | [^\]] )* '=\]' => { - throw LocatedParseError("Unsupported POSIX collating " - "element"); - }; - # Begin eating characters for class - '\[' => eatClass; - # Begin quoted literal - '\\Q' => { - fgoto readQuotedLiteral; - }; + # Literal character + any => { + currentCls->add(*ts); + inCharClassEarly = false; + }; + *|; + + + ############################################################# + # Parser to read (and ignore) a comment block + ############################################################# + readComment := |* + # Right paren + '\)' => { inComment = false; fgoto main; }; + + # absolutely everything gets ignored until we see a right + # paren + any; + *|; + + ############################################################# + # Parser to read (and ignore) a newline-terminated comment + # block + ############################################################# + readNewlineTerminatedComment := |* + '\n' => { inComment = false; fgoto main; }; + + # absolutely everything gets ignored until we see a + # newline + any; + *|; + + ############################################################# + # Parser for standard components + ############################################################# + main := |* + ############################################################# + # Standard components + ############################################################# + # Begin capturing group (non-capturing handled further down) + '\(' => enterCapturingGroup; + # End group + '\)' => exitGroup; + # Mark alternation + '\|' => { + currentSeq->addAlternation(); + }; + # POSIX named elements should only be used inside a class. Note + # that we need to be able to reject /[:\]:]/ here. + '\[:' ( '\\]' | [^\]] )* ':\]' => { + throw LocatedParseError("POSIX named classes are only " + "supported inside a class"); + }; + # We don't support POSIX collating elements (neither does PCRE + # or Perl). These look like [.ch.] or [=ch=]. + '\[\.' ( '\\]' | [^\]] )* '\.\]' | + '\[=' ( '\\]' | [^\]] )* '=\]' => { + throw LocatedParseError("Unsupported POSIX collating " + "element"); + }; + # Begin eating characters for class + '\[' => eatClass; + # Begin quoted literal + '\\Q' => { + fgoto readQuotedLiteral; + }; # An \E that is not preceded by a \Q is ignored '\\E' => { /* noop */ }; - # Match any character - '\.' => { - currentSeq->addComponent(generateComponent(CLASS_ANY, false, mode)); - }; - # Match one byte - '\\C' => { - if (mode.utf8) { - throw LocatedParseError("\\C is unsupported in UTF8"); - } - currentSeq->addComponent(ue2::make_unique<ComponentByte>()); - }; - # Match 0 or more times (greedy) - '\*' => { - if (!currentSeq->addRepeat(0, ComponentRepeat::NoLimit, - ComponentRepeat::REPEAT_GREEDY)) { - throwInvalidRepeat(); - } - }; - # Match 0 or more times (non-greedy) - '\*\?' => { - if (!currentSeq->addRepeat(0, ComponentRepeat::NoLimit, - ComponentRepeat::REPEAT_NONGREEDY)) { - throwInvalidRepeat(); - } - }; - # Match 0 or more times (possessive) - '\*\+' => { - if (!currentSeq->addRepeat(0, ComponentRepeat::NoLimit, - ComponentRepeat::REPEAT_POSSESSIVE)) { - throwInvalidRepeat(); - } - }; - # Match 1 or more times (greedy) - '\+' => { - if (!currentSeq->addRepeat(1, ComponentRepeat::NoLimit, - ComponentRepeat::REPEAT_GREEDY)) { - throwInvalidRepeat(); - } - }; - # Match 1 or more times (non-greedy) - '\+\?' => { - if (!currentSeq->addRepeat(1, ComponentRepeat::NoLimit, - ComponentRepeat::REPEAT_NONGREEDY)) { - throwInvalidRepeat(); - } - }; - # Match 1 or more times (possessive) - '\+\+' => { - if (!currentSeq->addRepeat(1, ComponentRepeat::NoLimit, - ComponentRepeat::REPEAT_POSSESSIVE)) { - throwInvalidRepeat(); - } - }; - # Match 0 or 1 times (greedy) - '\?' => { - if (!currentSeq->addRepeat( - 0, 1, ComponentRepeat::REPEAT_GREEDY)) { - throwInvalidRepeat(); - } - }; - # Match 0 or 1 times (non-greedy) - '\?\?' => { - if (!currentSeq->addRepeat( - 0, 1, ComponentRepeat::REPEAT_NONGREEDY)) { - throwInvalidRepeat(); - } - }; - # Match 0 or 1 times (possessive) - '\?\+' => { - if (!currentSeq->addRepeat( - 0, 1, ComponentRepeat::REPEAT_POSSESSIVE)) { - throwInvalidRepeat(); - } - }; - # Match {n}|{n,}|{n,m} times (greedy) - repeatNM1 => { - if (repeatN > repeatM || repeatM == 0) { - throwInvalidRepeat(); - } else if (!currentSeq->addRepeat( - repeatN, repeatM, - ComponentRepeat::REPEAT_GREEDY)) { - throwInvalidRepeat(); - } - }; - # Match {n}|{n,}|{n,m} times (non-greedy) - repeatNM1 '\?' => { - if (repeatN > repeatM || repeatM == 0) { - throwInvalidRepeat(); - } else if (!currentSeq->addRepeat( - repeatN, repeatM, - ComponentRepeat::REPEAT_NONGREEDY)) { - throwInvalidRepeat(); - } - }; - # Match {n}|{n,}|{n,m} times (possessive) - repeatNM1 '\+' => { - if (repeatN > repeatM || repeatM == 0) { - throwInvalidRepeat(); - } else if (!currentSeq->addRepeat( - repeatN, repeatM, - ComponentRepeat::REPEAT_POSSESSIVE)) { - throwInvalidRepeat(); - } - }; - - # In ignore_space mode, an unescaped # character introduces a - # comment that runs until the next newline or the end of the - # pattern. - '\#' when is_ignore_space => enterNewlineTerminatedComment; - - # Perl 5.10 Special Backtracking Control Verbs: we support - # UTF8/UCP, none of the others - '(*' [^)] => { fhold; fcall readVerb; }; - - # Earlier parser code checked for the terminating NULL and exited - # explicitly. - '\0' => { assert(0); fbreak; }; - - ############################################################# - # Boundaries - ############################################################# - - # Start of data; also after internal newline in multiline mode - '\^' => { - auto bound = mode.multiline ? ComponentBoundary::BEGIN_LINE - : ComponentBoundary::BEGIN_STRING; - currentSeq->addComponent(ue2::make_unique<ComponentBoundary>(bound)); - }; - # End of data (with optional internal newline); also before - # internal newline in multiline mode - '\$' => { - auto bound = mode.multiline ? ComponentBoundary::END_LINE - : ComponentBoundary::END_STRING_OPTIONAL_LF; - currentSeq->addComponent(ue2::make_unique<ComponentBoundary>(bound)); - }; - # Beginning of data - '\\A' => { - auto bound = ComponentBoundary::BEGIN_STRING; - currentSeq->addComponent(ue2::make_unique<ComponentBoundary>(bound)); - }; - # End of data (with optional internal newline) - '\\Z' => { - auto bound = ComponentBoundary::END_STRING_OPTIONAL_LF; - currentSeq->addComponent(ue2::make_unique<ComponentBoundary>(bound)); - }; - # End of data - '\\z' => { - auto bound = ComponentBoundary::END_STRING; - currentSeq->addComponent(ue2::make_unique<ComponentBoundary>(bound)); - }; - # Word boundary - '\\b' => { - currentSeq->addComponent( - ue2::make_unique<ComponentWordBoundary>(ts - ptr, false, mode)); - }; - # Non-word boundary - '\\B' => { - currentSeq->addComponent( - ue2::make_unique<ComponentWordBoundary>(ts - ptr, true, mode)); - }; - - ############################################################# - # Escaped chars - ############################################################# - - # Tab - '\\t' => { - addLiteral(currentSeq, '\x09', mode); - }; - # Newline - '\\n' => { - addLiteral(currentSeq, '\x0a', mode); - }; - # Carriage return - '\\r' => { - addLiteral(currentSeq, '\x0d', mode); - }; - # Form feed - '\\f' => { - addLiteral(currentSeq, '\x0c', mode); - }; - # Bell - '\\a' => { - addLiteral(currentSeq, '\x07', mode); - }; - # Escape - '\\e' => { - addLiteral(currentSeq, '\x1b', mode); - }; - # Octal - escapedOctal0 => { - addLiteral(currentSeq, octAccumulator, mode); - }; - escapedOctal2 => { - // If there are enough capturing sub expressions, this may be - // a back reference - accumulator = parseAsDecimal(octAccumulator); - if (accumulator < groupIndex) { - currentSeq->addComponent(ue2::make_unique<ComponentBackReference>(accumulator)); - } else { - addEscapedOctal(currentSeq, octAccumulator, mode); - } - }; - - # Numeric back reference - # everything less than 8 is a straight up back ref, even if - # it is a forwards backward reference (aieeee!) - # Note that \8 and \9 are the literal chars '8' and '9'. - '\\' backRefIdSingle => addNumberedBackRef; - # otherwise we need to munge through the possible backref - '\\' backRefId => { - // if there are enough left parens to this point, back ref - if (accumulator < groupIndex) { - currentSeq->addComponent(ue2::make_unique<ComponentBackReference>(accumulator)); - } else { - // Otherwise, we interpret the first three digits as an - // octal escape, and the remaining characters stand for - // themselves as literals. + # Match any character + '\.' => { + currentSeq->addComponent(generateComponent(CLASS_ANY, false, mode)); + }; + # Match one byte + '\\C' => { + if (mode.utf8) { + throw LocatedParseError("\\C is unsupported in UTF8"); + } + currentSeq->addComponent(ue2::make_unique<ComponentByte>()); + }; + # Match 0 or more times (greedy) + '\*' => { + if (!currentSeq->addRepeat(0, ComponentRepeat::NoLimit, + ComponentRepeat::REPEAT_GREEDY)) { + throwInvalidRepeat(); + } + }; + # Match 0 or more times (non-greedy) + '\*\?' => { + if (!currentSeq->addRepeat(0, ComponentRepeat::NoLimit, + ComponentRepeat::REPEAT_NONGREEDY)) { + throwInvalidRepeat(); + } + }; + # Match 0 or more times (possessive) + '\*\+' => { + if (!currentSeq->addRepeat(0, ComponentRepeat::NoLimit, + ComponentRepeat::REPEAT_POSSESSIVE)) { + throwInvalidRepeat(); + } + }; + # Match 1 or more times (greedy) + '\+' => { + if (!currentSeq->addRepeat(1, ComponentRepeat::NoLimit, + ComponentRepeat::REPEAT_GREEDY)) { + throwInvalidRepeat(); + } + }; + # Match 1 or more times (non-greedy) + '\+\?' => { + if (!currentSeq->addRepeat(1, ComponentRepeat::NoLimit, + ComponentRepeat::REPEAT_NONGREEDY)) { + throwInvalidRepeat(); + } + }; + # Match 1 or more times (possessive) + '\+\+' => { + if (!currentSeq->addRepeat(1, ComponentRepeat::NoLimit, + ComponentRepeat::REPEAT_POSSESSIVE)) { + throwInvalidRepeat(); + } + }; + # Match 0 or 1 times (greedy) + '\?' => { + if (!currentSeq->addRepeat( + 0, 1, ComponentRepeat::REPEAT_GREEDY)) { + throwInvalidRepeat(); + } + }; + # Match 0 or 1 times (non-greedy) + '\?\?' => { + if (!currentSeq->addRepeat( + 0, 1, ComponentRepeat::REPEAT_NONGREEDY)) { + throwInvalidRepeat(); + } + }; + # Match 0 or 1 times (possessive) + '\?\+' => { + if (!currentSeq->addRepeat( + 0, 1, ComponentRepeat::REPEAT_POSSESSIVE)) { + throwInvalidRepeat(); + } + }; + # Match {n}|{n,}|{n,m} times (greedy) + repeatNM1 => { + if (repeatN > repeatM || repeatM == 0) { + throwInvalidRepeat(); + } else if (!currentSeq->addRepeat( + repeatN, repeatM, + ComponentRepeat::REPEAT_GREEDY)) { + throwInvalidRepeat(); + } + }; + # Match {n}|{n,}|{n,m} times (non-greedy) + repeatNM1 '\?' => { + if (repeatN > repeatM || repeatM == 0) { + throwInvalidRepeat(); + } else if (!currentSeq->addRepeat( + repeatN, repeatM, + ComponentRepeat::REPEAT_NONGREEDY)) { + throwInvalidRepeat(); + } + }; + # Match {n}|{n,}|{n,m} times (possessive) + repeatNM1 '\+' => { + if (repeatN > repeatM || repeatM == 0) { + throwInvalidRepeat(); + } else if (!currentSeq->addRepeat( + repeatN, repeatM, + ComponentRepeat::REPEAT_POSSESSIVE)) { + throwInvalidRepeat(); + } + }; + + # In ignore_space mode, an unescaped # character introduces a + # comment that runs until the next newline or the end of the + # pattern. + '\#' when is_ignore_space => enterNewlineTerminatedComment; + + # Perl 5.10 Special Backtracking Control Verbs: we support + # UTF8/UCP, none of the others + '(*' [^)] => { fhold; fcall readVerb; }; + + # Earlier parser code checked for the terminating NULL and exited + # explicitly. + '\0' => { assert(0); fbreak; }; + + ############################################################# + # Boundaries + ############################################################# + + # Start of data; also after internal newline in multiline mode + '\^' => { + auto bound = mode.multiline ? ComponentBoundary::BEGIN_LINE + : ComponentBoundary::BEGIN_STRING; + currentSeq->addComponent(ue2::make_unique<ComponentBoundary>(bound)); + }; + # End of data (with optional internal newline); also before + # internal newline in multiline mode + '\$' => { + auto bound = mode.multiline ? ComponentBoundary::END_LINE + : ComponentBoundary::END_STRING_OPTIONAL_LF; + currentSeq->addComponent(ue2::make_unique<ComponentBoundary>(bound)); + }; + # Beginning of data + '\\A' => { + auto bound = ComponentBoundary::BEGIN_STRING; + currentSeq->addComponent(ue2::make_unique<ComponentBoundary>(bound)); + }; + # End of data (with optional internal newline) + '\\Z' => { + auto bound = ComponentBoundary::END_STRING_OPTIONAL_LF; + currentSeq->addComponent(ue2::make_unique<ComponentBoundary>(bound)); + }; + # End of data + '\\z' => { + auto bound = ComponentBoundary::END_STRING; + currentSeq->addComponent(ue2::make_unique<ComponentBoundary>(bound)); + }; + # Word boundary + '\\b' => { + currentSeq->addComponent( + ue2::make_unique<ComponentWordBoundary>(ts - ptr, false, mode)); + }; + # Non-word boundary + '\\B' => { + currentSeq->addComponent( + ue2::make_unique<ComponentWordBoundary>(ts - ptr, true, mode)); + }; + + ############################################################# + # Escaped chars + ############################################################# + + # Tab + '\\t' => { + addLiteral(currentSeq, '\x09', mode); + }; + # Newline + '\\n' => { + addLiteral(currentSeq, '\x0a', mode); + }; + # Carriage return + '\\r' => { + addLiteral(currentSeq, '\x0d', mode); + }; + # Form feed + '\\f' => { + addLiteral(currentSeq, '\x0c', mode); + }; + # Bell + '\\a' => { + addLiteral(currentSeq, '\x07', mode); + }; + # Escape + '\\e' => { + addLiteral(currentSeq, '\x1b', mode); + }; + # Octal + escapedOctal0 => { + addLiteral(currentSeq, octAccumulator, mode); + }; + escapedOctal2 => { + // If there are enough capturing sub expressions, this may be + // a back reference + accumulator = parseAsDecimal(octAccumulator); + if (accumulator < groupIndex) { + currentSeq->addComponent(ue2::make_unique<ComponentBackReference>(accumulator)); + } else { + addEscapedOctal(currentSeq, octAccumulator, mode); + } + }; + + # Numeric back reference + # everything less than 8 is a straight up back ref, even if + # it is a forwards backward reference (aieeee!) + # Note that \8 and \9 are the literal chars '8' and '9'. + '\\' backRefIdSingle => addNumberedBackRef; + # otherwise we need to munge through the possible backref + '\\' backRefId => { + // if there are enough left parens to this point, back ref + if (accumulator < groupIndex) { + currentSeq->addComponent(ue2::make_unique<ComponentBackReference>(accumulator)); + } else { + // Otherwise, we interpret the first three digits as an + // octal escape, and the remaining characters stand for + // themselves as literals. const char *s = ts; - unsigned int accum = 0; - unsigned int oct_digits = 0; + unsigned int accum = 0; + unsigned int oct_digits = 0; assert(*s == '\\'); // token starts at backslash for (++s; s < te && oct_digits < 3; ++oct_digits, ++s) { u8 digit = *s - '0'; - if (digit < 8) { - accum = digit + accum * 8; - } else { - break; - } - } - - if (oct_digits > 0) { - addEscapedOctal(currentSeq, accum, mode); - } - - // And then the rest of the digits, if any, are literal. + if (digit < 8) { + accum = digit + accum * 8; + } else { + break; + } + } + + if (oct_digits > 0) { + addEscapedOctal(currentSeq, accum, mode); + } + + // And then the rest of the digits, if any, are literal. for (; s < te; ++s) { addLiteral(currentSeq, *s, mode); - } - } - }; - backReferenceG => addNumberedBackRef; - backReferenceGNegative => addNegativeNumberedBackRef; - backReferenceGBracket => addNumberedBackRef; - backReferenceGBracket2 => addNegativeNumberedBackRef; - backReferenceGBracketName => addNamedBackRef; - backReferenceKBracketName => addNamedBackRef; - backReferenceKBracketName2 => addNamedBackRef; - backReferenceKBracketName3 => addNamedBackRef; - backReferenceP => addNamedBackRef; - # Oniguruma - either angle braces or single quotes for this one - ('\\g<' [^>]*? '>'|'\\g\'' [^\']*? '\'') => { - ostringstream str; - str << "Onigiruma subroutine call at index " << ts - ptr << - " not supported."; - throw ParseError(str.str()); - }; - # Fallthrough: a \g that hasn't been caught by one of the above - # is invalid syntax. Without this rule, we would accept /A\g/. - '\\g' => { - throw LocatedParseError("Invalid reference after \\g"); - }; - '\\o{' [0-7]+ '}' => { + } + } + }; + backReferenceG => addNumberedBackRef; + backReferenceGNegative => addNegativeNumberedBackRef; + backReferenceGBracket => addNumberedBackRef; + backReferenceGBracket2 => addNegativeNumberedBackRef; + backReferenceGBracketName => addNamedBackRef; + backReferenceKBracketName => addNamedBackRef; + backReferenceKBracketName2 => addNamedBackRef; + backReferenceKBracketName3 => addNamedBackRef; + backReferenceP => addNamedBackRef; + # Oniguruma - either angle braces or single quotes for this one + ('\\g<' [^>]*? '>'|'\\g\'' [^\']*? '\'') => { + ostringstream str; + str << "Onigiruma subroutine call at index " << ts - ptr << + " not supported."; + throw ParseError(str.str()); + }; + # Fallthrough: a \g that hasn't been caught by one of the above + # is invalid syntax. Without this rule, we would accept /A\g/. + '\\g' => { + throw LocatedParseError("Invalid reference after \\g"); + }; + '\\o{' [0-7]+ '}' => { string oct(ts + 3, te - ts - 4); unsigned long val; try { @@ -1566,21 +1566,21 @@ unichar readUtf8CodePoint4c(const char *s) { } catch (const std::out_of_range &) { val = MAX_UNICODE + 1; } - if ((!mode.utf8 && val > 255) || val > MAX_UNICODE) { - throw LocatedParseError("Value in \\o{...} sequence is too large"); - } - addEscapedOctal(currentSeq, (unichar)val, mode); - }; - # And for when it goes wrong - '\\o' => { - throw LocatedParseError("Value in \\o{...} sequence is non-octal or missing braces"); - }; - # Hex - escapedHex => { - addEscapedHex(currentSeq, accumulator, mode); - }; - # Unicode Hex - '\\x{' xdigit+ '}' => { + if ((!mode.utf8 && val > 255) || val > MAX_UNICODE) { + throw LocatedParseError("Value in \\o{...} sequence is too large"); + } + addEscapedOctal(currentSeq, (unichar)val, mode); + }; + # And for when it goes wrong + '\\o' => { + throw LocatedParseError("Value in \\o{...} sequence is non-octal or missing braces"); + }; + # Hex + escapedHex => { + addEscapedHex(currentSeq, accumulator, mode); + }; + # Unicode Hex + '\\x{' xdigit+ '}' => { string hex(ts + 3, te - ts - 4); unsigned long val; try { @@ -1588,330 +1588,330 @@ unichar readUtf8CodePoint4c(const char *s) { } catch (const std::out_of_range &) { val = MAX_UNICODE + 1; } - if (val > MAX_UNICODE) { - throw LocatedParseError("Value in \\x{...} sequence is too large"); - } - addEscapedHex(currentSeq, (unichar)val, mode); - }; - # And for when it goes wrong - '\\x{' => { - throw LocatedParseError("Value in \\x{...} sequence is non-hex or missing }"); - }; - # Control characters - escapedCtrl => { - if (te - ts < 3) { - assert(te - ts == 2); - throw LocatedParseError(SLASH_C_ERROR); - } else { - assert(te - ts == 3); - addLiteral(currentSeq, decodeCtrl(ts[2]), mode); - } - }; - # A bunch of unsupported (for now) escapes - escapedUnsupported => { - ostringstream str; + if (val > MAX_UNICODE) { + throw LocatedParseError("Value in \\x{...} sequence is too large"); + } + addEscapedHex(currentSeq, (unichar)val, mode); + }; + # And for when it goes wrong + '\\x{' => { + throw LocatedParseError("Value in \\x{...} sequence is non-hex or missing }"); + }; + # Control characters + escapedCtrl => { + if (te - ts < 3) { + assert(te - ts == 2); + throw LocatedParseError(SLASH_C_ERROR); + } else { + assert(te - ts == 3); + addLiteral(currentSeq, decodeCtrl(ts[2]), mode); + } + }; + # A bunch of unsupported (for now) escapes + escapedUnsupported => { + ostringstream str; str << "'\\" << *(ts + 1) << "' at index " << ts - ptr << " not supported."; - throw ParseError(str.str()); - }; - - # Word character - '\\w' => { - auto cc = generateComponent(CLASS_WORD, false, mode); - currentSeq->addComponent(move(cc)); - }; - # Non word character - '\\W' => { - auto cc = generateComponent(CLASS_WORD, true, mode); - currentSeq->addComponent(move(cc)); - }; - # Whitespace character - '\\s' => { - auto cc = generateComponent(CLASS_SPACE, false, mode); - currentSeq->addComponent(move(cc)); - }; - # Non whitespace character - '\\S' => { - auto cc = generateComponent(CLASS_SPACE, true, mode); - currentSeq->addComponent(move(cc)); - }; - # Digit character - '\\d' => { - auto cc = generateComponent(CLASS_DIGIT, false, mode); - currentSeq->addComponent(move(cc)); - }; - # Non digit character - '\\D' => { - auto cc = generateComponent(CLASS_DIGIT, true, mode); - currentSeq->addComponent(move(cc)); - }; - # Horizontal whitespace - '\\h' => { - auto cc = generateComponent(CLASS_HORZ, false, mode); - currentSeq->addComponent(move(cc)); - }; - # Not horizontal whitespace - '\\H' => { - auto cc = generateComponent(CLASS_HORZ, true, mode); - currentSeq->addComponent(move(cc)); - }; - # Vertical whitespace - '\\v' => { - auto cc = generateComponent(CLASS_VERT, false, mode); - currentSeq->addComponent(move(cc)); - }; - # Not vertical whitespace - '\\V' => { - auto cc = generateComponent(CLASS_VERT, true, mode); - currentSeq->addComponent(move(cc)); - }; - - '\\p{' => { - assert(!currentCls && !inCharClass); - currentCls = getComponentClass(mode); - negated = false; - fhold; - fcall readBracedUCP; - }; - - '\\p' any => { - assert(!currentCls && !inCharClass); - currentCls = getComponentClass(mode); - negated = false; - fhold; - fcall readUCPSingle; - }; - - '\\P{' => { - assert(!currentCls && !inCharClass); - currentCls = getComponentClass(mode); - negated = true; - fhold; - fcall readBracedUCP; - }; - - '\\P' any => { - assert(!currentCls && !inCharClass); - currentCls = getComponentClass(mode); - negated = true; - fhold; - fcall readUCPSingle; - }; - - '\\P' => { throw LocatedParseError("Malformed property"); }; - '\\p' => { throw LocatedParseError("Malformed property"); }; - - # Newline sequence, hairy semantics that we don't do - '\\R' => { - ostringstream str; - str << "\\R at index " << ts - ptr << " not supported."; - throw ParseError(str.str()); - }; - - # Reset start of match, also hairy semantics that we don't do - '\\K' => { - ostringstream str; - str << "\\K at index " << ts - ptr << " not supported."; - throw ParseError(str.str()); - }; - - # \k without a backref is bugged in PCRE so we have no - # idea what our semantics should be on it - '\\k' => { - ostringstream str; - str << "\\k at index " << ts - ptr << " not supported."; - throw ParseError(str.str()); - }; - - # \G is more hairy pcre-api stuff, DO NOT WANT - '\\G' => { - ostringstream str; - str << "\\G at index " << ts - ptr << " not supported."; - throw ParseError(str.str()); - }; - - '\\X' => { - currentSeq->addComponent(ue2::make_unique<ComponentEUS>(ts - ptr, mode)); - }; - - # Fall through general escaped character - '\\' any => { - addLiteral(currentSeq, *(ts + 1), mode); - }; - - # A backslash with no follower is not allowed - '\\' => { - assert(ts + 1 == pe); - ostringstream str; - str << "Unescaped \\ at end of input, index " << ts - ptr << "."; - throw ParseError(str.str()); - }; - - ############################################################# - # Extended patterns - ############################################################# - - # Comment - '\(\?\#' => enterComment; - # Match modifiers - '\(\?' matchModifiers >resetModifiers ')' => applyModifiers; - # Non-capturing group, with flag modifiers - '\(\?' matchModifiers >resetModifiers ':' => enterModifiedGroup; - # Zero width look ahead assertion - '\(\?=' => enterZWLookAhead; - # Zero width negative look ahead assertion - '\(\?\!' => enterZWNegLookAhead; - # Zero width look behind assertion - '\(\?\<=' => enterZWLookBehind; - # Zero width negative look behind assertion - '\(\?\<\!' => enterZWNegLookBehind; - # Code (TOTALLY unsupported... for good reason) - '\(\?\{' => enterEmbeddedCode; - '\(\?\?\{' => enterEmbeddedCode; - # Atomic group - '\(\?\>' => enterAtomicGroup; - - # Named capturing groups - ( namedGroup1 | - namedGroup2 | - namedGroup3 ) => enterNamedGroup; - - # named/numbered subroutine references - numberedSubExpression => enterReferenceUnsupported; - namedSubExpression => enterReferenceUnsupported; - - # Conditional reference with a positive lookahead assertion - '(?(?=' => { - auto a = ue2::make_unique<ComponentAssertion>( - ComponentAssertion::LOOKAHEAD, ComponentAssertion::POS); - ComponentAssertion *a_seq = a.get(); - PUSH_SEQUENCE; - currentSeq = enterSequence(currentSeq, - ue2::make_unique<ComponentCondReference>(move(a))); - PUSH_SEQUENCE; - currentSeq = a_seq; - }; - # Conditional reference with a negative lookahead assertion - '(?(?!' => { - auto a = ue2::make_unique<ComponentAssertion>( - ComponentAssertion::LOOKAHEAD, ComponentAssertion::NEG); - ComponentAssertion *a_seq = a.get(); - PUSH_SEQUENCE; - currentSeq = enterSequence(currentSeq, - ue2::make_unique<ComponentCondReference>(move(a))); - PUSH_SEQUENCE; - currentSeq = a_seq; - }; - # Conditional reference with a positive lookbehind assertion - '(?(?<=' => { - auto a = ue2::make_unique<ComponentAssertion>( - ComponentAssertion::LOOKBEHIND, ComponentAssertion::POS); - ComponentAssertion *a_seq = a.get(); - PUSH_SEQUENCE; - currentSeq = enterSequence(currentSeq, - ue2::make_unique<ComponentCondReference>(move(a))); - PUSH_SEQUENCE; - currentSeq = a_seq; - }; - # Conditional reference with a negative lookbehind assertion - '(?(?<!' => { - auto a = ue2::make_unique<ComponentAssertion>( - ComponentAssertion::LOOKBEHIND, ComponentAssertion::NEG); - ComponentAssertion *a_seq = a.get(); - PUSH_SEQUENCE; - currentSeq = enterSequence(currentSeq, - ue2::make_unique<ComponentCondReference>(move(a))); - PUSH_SEQUENCE; - currentSeq = a_seq; - }; - - # Recursive conditional references (unsupported) - '(?(R' ( [0-9]+ | ('&' [A-Za-z0-9_]+) ) ? ')' => { - throw LocatedParseError("Pattern recursion not supported"); - }; - - # Conditional references - # numbered - '\(\?\(' (backRefIdSingle | backRefId) ')' => enterNumberedConditionalRef; - # named - ( namedConditionalRef1 | - namedConditionalRef2 | - namedConditionalRef3 ) => enterNamedConditionalRef; - - # Conditions (unsupported) - '\(\?\(' => enterConditionUnsupported; - - # Callouts (unsupported) - '\(\?C' [0-9]* '\)' => { - ostringstream str; - str << "Callout at index " << ts - ptr << " not supported."; - throw ParseError(str.str()); - }; - - # Any other char after '(?' is a pattern modifier we don't - # recognise. - '\(\?' any => { - throw LocatedParseError("Unrecognised character after (?"); - }; - - #unicode chars - utf8_2c when is_utf8 => { - assert(mode.utf8); - /* leverage ComponentClass to generate the vertices */ - auto cc = getComponentClass(mode); - cc->add(readUtf8CodePoint2c(ts)); - cc->finalize(); - currentSeq->addComponent(move(cc)); - }; - - utf8_3c when is_utf8 => { - assert(mode.utf8); - /* leverage ComponentClass to generate the vertices */ - auto cc = getComponentClass(mode); - cc->add(readUtf8CodePoint3c(ts)); - cc->finalize(); - currentSeq->addComponent(move(cc)); - }; - - utf8_4c when is_utf8 => { - assert(mode.utf8); - /* leverage ComponentClass to generate the vertices */ - auto cc = getComponentClass(mode); - cc->add(readUtf8CodePoint4c(ts)); - cc->finalize(); - currentSeq->addComponent(move(cc)); - }; - - hi_byte when is_utf8 => { - assert(mode.utf8); - throwInvalidUtf8(); - }; - - ############################################################# - # Literal character - ############################################################# - # literal character - whitespace => { - if (mode.ignore_space == false) { - addLiteral(currentSeq, *ts, mode); - } - }; - any => { - addLiteral(currentSeq, *ts, mode); - }; - *|; - - prepush { - DEBUG_PRINTF("stack %zu top %d\n", stack.size(), top); - if ((int)stack.size() == top) { - stack.resize(2 * (top + 1)); - } - } -}%% - -%% write data nofinal; - -/** \brief Main parser call, returns root Component or nullptr. */ + throw ParseError(str.str()); + }; + + # Word character + '\\w' => { + auto cc = generateComponent(CLASS_WORD, false, mode); + currentSeq->addComponent(move(cc)); + }; + # Non word character + '\\W' => { + auto cc = generateComponent(CLASS_WORD, true, mode); + currentSeq->addComponent(move(cc)); + }; + # Whitespace character + '\\s' => { + auto cc = generateComponent(CLASS_SPACE, false, mode); + currentSeq->addComponent(move(cc)); + }; + # Non whitespace character + '\\S' => { + auto cc = generateComponent(CLASS_SPACE, true, mode); + currentSeq->addComponent(move(cc)); + }; + # Digit character + '\\d' => { + auto cc = generateComponent(CLASS_DIGIT, false, mode); + currentSeq->addComponent(move(cc)); + }; + # Non digit character + '\\D' => { + auto cc = generateComponent(CLASS_DIGIT, true, mode); + currentSeq->addComponent(move(cc)); + }; + # Horizontal whitespace + '\\h' => { + auto cc = generateComponent(CLASS_HORZ, false, mode); + currentSeq->addComponent(move(cc)); + }; + # Not horizontal whitespace + '\\H' => { + auto cc = generateComponent(CLASS_HORZ, true, mode); + currentSeq->addComponent(move(cc)); + }; + # Vertical whitespace + '\\v' => { + auto cc = generateComponent(CLASS_VERT, false, mode); + currentSeq->addComponent(move(cc)); + }; + # Not vertical whitespace + '\\V' => { + auto cc = generateComponent(CLASS_VERT, true, mode); + currentSeq->addComponent(move(cc)); + }; + + '\\p{' => { + assert(!currentCls && !inCharClass); + currentCls = getComponentClass(mode); + negated = false; + fhold; + fcall readBracedUCP; + }; + + '\\p' any => { + assert(!currentCls && !inCharClass); + currentCls = getComponentClass(mode); + negated = false; + fhold; + fcall readUCPSingle; + }; + + '\\P{' => { + assert(!currentCls && !inCharClass); + currentCls = getComponentClass(mode); + negated = true; + fhold; + fcall readBracedUCP; + }; + + '\\P' any => { + assert(!currentCls && !inCharClass); + currentCls = getComponentClass(mode); + negated = true; + fhold; + fcall readUCPSingle; + }; + + '\\P' => { throw LocatedParseError("Malformed property"); }; + '\\p' => { throw LocatedParseError("Malformed property"); }; + + # Newline sequence, hairy semantics that we don't do + '\\R' => { + ostringstream str; + str << "\\R at index " << ts - ptr << " not supported."; + throw ParseError(str.str()); + }; + + # Reset start of match, also hairy semantics that we don't do + '\\K' => { + ostringstream str; + str << "\\K at index " << ts - ptr << " not supported."; + throw ParseError(str.str()); + }; + + # \k without a backref is bugged in PCRE so we have no + # idea what our semantics should be on it + '\\k' => { + ostringstream str; + str << "\\k at index " << ts - ptr << " not supported."; + throw ParseError(str.str()); + }; + + # \G is more hairy pcre-api stuff, DO NOT WANT + '\\G' => { + ostringstream str; + str << "\\G at index " << ts - ptr << " not supported."; + throw ParseError(str.str()); + }; + + '\\X' => { + currentSeq->addComponent(ue2::make_unique<ComponentEUS>(ts - ptr, mode)); + }; + + # Fall through general escaped character + '\\' any => { + addLiteral(currentSeq, *(ts + 1), mode); + }; + + # A backslash with no follower is not allowed + '\\' => { + assert(ts + 1 == pe); + ostringstream str; + str << "Unescaped \\ at end of input, index " << ts - ptr << "."; + throw ParseError(str.str()); + }; + + ############################################################# + # Extended patterns + ############################################################# + + # Comment + '\(\?\#' => enterComment; + # Match modifiers + '\(\?' matchModifiers >resetModifiers ')' => applyModifiers; + # Non-capturing group, with flag modifiers + '\(\?' matchModifiers >resetModifiers ':' => enterModifiedGroup; + # Zero width look ahead assertion + '\(\?=' => enterZWLookAhead; + # Zero width negative look ahead assertion + '\(\?\!' => enterZWNegLookAhead; + # Zero width look behind assertion + '\(\?\<=' => enterZWLookBehind; + # Zero width negative look behind assertion + '\(\?\<\!' => enterZWNegLookBehind; + # Code (TOTALLY unsupported... for good reason) + '\(\?\{' => enterEmbeddedCode; + '\(\?\?\{' => enterEmbeddedCode; + # Atomic group + '\(\?\>' => enterAtomicGroup; + + # Named capturing groups + ( namedGroup1 | + namedGroup2 | + namedGroup3 ) => enterNamedGroup; + + # named/numbered subroutine references + numberedSubExpression => enterReferenceUnsupported; + namedSubExpression => enterReferenceUnsupported; + + # Conditional reference with a positive lookahead assertion + '(?(?=' => { + auto a = ue2::make_unique<ComponentAssertion>( + ComponentAssertion::LOOKAHEAD, ComponentAssertion::POS); + ComponentAssertion *a_seq = a.get(); + PUSH_SEQUENCE; + currentSeq = enterSequence(currentSeq, + ue2::make_unique<ComponentCondReference>(move(a))); + PUSH_SEQUENCE; + currentSeq = a_seq; + }; + # Conditional reference with a negative lookahead assertion + '(?(?!' => { + auto a = ue2::make_unique<ComponentAssertion>( + ComponentAssertion::LOOKAHEAD, ComponentAssertion::NEG); + ComponentAssertion *a_seq = a.get(); + PUSH_SEQUENCE; + currentSeq = enterSequence(currentSeq, + ue2::make_unique<ComponentCondReference>(move(a))); + PUSH_SEQUENCE; + currentSeq = a_seq; + }; + # Conditional reference with a positive lookbehind assertion + '(?(?<=' => { + auto a = ue2::make_unique<ComponentAssertion>( + ComponentAssertion::LOOKBEHIND, ComponentAssertion::POS); + ComponentAssertion *a_seq = a.get(); + PUSH_SEQUENCE; + currentSeq = enterSequence(currentSeq, + ue2::make_unique<ComponentCondReference>(move(a))); + PUSH_SEQUENCE; + currentSeq = a_seq; + }; + # Conditional reference with a negative lookbehind assertion + '(?(?<!' => { + auto a = ue2::make_unique<ComponentAssertion>( + ComponentAssertion::LOOKBEHIND, ComponentAssertion::NEG); + ComponentAssertion *a_seq = a.get(); + PUSH_SEQUENCE; + currentSeq = enterSequence(currentSeq, + ue2::make_unique<ComponentCondReference>(move(a))); + PUSH_SEQUENCE; + currentSeq = a_seq; + }; + + # Recursive conditional references (unsupported) + '(?(R' ( [0-9]+ | ('&' [A-Za-z0-9_]+) ) ? ')' => { + throw LocatedParseError("Pattern recursion not supported"); + }; + + # Conditional references + # numbered + '\(\?\(' (backRefIdSingle | backRefId) ')' => enterNumberedConditionalRef; + # named + ( namedConditionalRef1 | + namedConditionalRef2 | + namedConditionalRef3 ) => enterNamedConditionalRef; + + # Conditions (unsupported) + '\(\?\(' => enterConditionUnsupported; + + # Callouts (unsupported) + '\(\?C' [0-9]* '\)' => { + ostringstream str; + str << "Callout at index " << ts - ptr << " not supported."; + throw ParseError(str.str()); + }; + + # Any other char after '(?' is a pattern modifier we don't + # recognise. + '\(\?' any => { + throw LocatedParseError("Unrecognised character after (?"); + }; + + #unicode chars + utf8_2c when is_utf8 => { + assert(mode.utf8); + /* leverage ComponentClass to generate the vertices */ + auto cc = getComponentClass(mode); + cc->add(readUtf8CodePoint2c(ts)); + cc->finalize(); + currentSeq->addComponent(move(cc)); + }; + + utf8_3c when is_utf8 => { + assert(mode.utf8); + /* leverage ComponentClass to generate the vertices */ + auto cc = getComponentClass(mode); + cc->add(readUtf8CodePoint3c(ts)); + cc->finalize(); + currentSeq->addComponent(move(cc)); + }; + + utf8_4c when is_utf8 => { + assert(mode.utf8); + /* leverage ComponentClass to generate the vertices */ + auto cc = getComponentClass(mode); + cc->add(readUtf8CodePoint4c(ts)); + cc->finalize(); + currentSeq->addComponent(move(cc)); + }; + + hi_byte when is_utf8 => { + assert(mode.utf8); + throwInvalidUtf8(); + }; + + ############################################################# + # Literal character + ############################################################# + # literal character + whitespace => { + if (mode.ignore_space == false) { + addLiteral(currentSeq, *ts, mode); + } + }; + any => { + addLiteral(currentSeq, *ts, mode); + }; + *|; + + prepush { + DEBUG_PRINTF("stack %zu top %d\n", stack.size(), top); + if ((int)stack.size() == top) { + stack.resize(2 * (top + 1)); + } + } +}%% + +%% write data nofinal; + +/** \brief Main parser call, returns root Component or nullptr. */ unique_ptr<Component> parse(const char *ptr, ParseMode &globalMode) { assert(ptr); @@ -1923,116 +1923,116 @@ unique_ptr<Component> parse(const char *ptr, ParseMode &globalMode) { p = read_control_verbs(p, pe, 0, globalMode); const char *eof = pe; - int cs; - UNUSED int act; - int top; - vector<int> stack; + int cs; + UNUSED int act; + int top; + vector<int> stack; const char *ts, *te; - unichar accumulator = 0; - unichar octAccumulator = 0; /* required as we are also accumulating for - * back ref when looking for octals */ - unsigned repeatN = 0; - unsigned repeatM = 0; - string label; - - ParseMode mode = globalMode; - ParseMode newMode; - - bool negated = false; - bool inComment = false; - - // Stack of sequences and flags used to store state when we enter - // sub-sequences. - vector<ExprState> sequences; - - // Index of the next capturing group. Note that zero is reserved for the - // root sequence. - unsigned groupIndex = 1; - - // Set storing group names that are currently in use. + unichar accumulator = 0; + unichar octAccumulator = 0; /* required as we are also accumulating for + * back ref when looking for octals */ + unsigned repeatN = 0; + unsigned repeatM = 0; + string label; + + ParseMode mode = globalMode; + ParseMode newMode; + + bool negated = false; + bool inComment = false; + + // Stack of sequences and flags used to store state when we enter + // sub-sequences. + vector<ExprState> sequences; + + // Index of the next capturing group. Note that zero is reserved for the + // root sequence. + unsigned groupIndex = 1; + + // Set storing group names that are currently in use. flat_set<string> groupNames; - - // Root sequence. - unique_ptr<ComponentSequence> rootSeq = ue2::make_unique<ComponentSequence>(); - rootSeq->setCaptureIndex(0); - - // Current sequence being appended to - ComponentSequence *currentSeq = rootSeq.get(); - - // The current character class being appended to. This is used as the - // accumulator for both character class and UCP properties. - unique_ptr<ComponentClass> currentCls; - - // True if the machine is currently inside a character class, i.e. square - // brackets [..]. - bool inCharClass = false; - - // True if the machine is inside a character class but it has not processed - // any "real" elements yet, i.e. it's still processing meta-characters like - // '^'. - bool inCharClassEarly = false; - - // Location at which the current character class began. + + // Root sequence. + unique_ptr<ComponentSequence> rootSeq = ue2::make_unique<ComponentSequence>(); + rootSeq->setCaptureIndex(0); + + // Current sequence being appended to + ComponentSequence *currentSeq = rootSeq.get(); + + // The current character class being appended to. This is used as the + // accumulator for both character class and UCP properties. + unique_ptr<ComponentClass> currentCls; + + // True if the machine is currently inside a character class, i.e. square + // brackets [..]. + bool inCharClass = false; + + // True if the machine is inside a character class but it has not processed + // any "real" elements yet, i.e. it's still processing meta-characters like + // '^'. + bool inCharClassEarly = false; + + // Location at which the current character class began. const char *currentClsBegin = p; - - // We throw exceptions on various parsing failures beyond this point: we - // use a try/catch block here to clean up our allocated memory before we - // re-throw the exception to the caller. - try { - // Embed the Ragel machine here - %% write init; - %% write exec; - - if (p != pe && *p != '\0') { - // didn't make it to the end of our input, but we didn't throw a ParseError? - assert(0); - ostringstream str; - str << "Parse error at index " << (p - ptr) << "."; - throw ParseError(str.str()); - } - - if (currentCls) { - assert(inCharClass); - assert(currentClsBegin); - ostringstream oss; - oss << "Unterminated character class starting at index " - << currentClsBegin - ptr << "."; - throw ParseError(oss.str()); - } - - if (inComment) { - throw ParseError("Unterminated comment."); - } - - if (!sequences.empty()) { - ostringstream str; - str << "Missing close parenthesis for group started at index " - << sequences.back().seqOffset << "."; - throw ParseError(str.str()); - } - - // Unlikely, but possible - if (groupIndex > 65535) { - throw ParseError("The maximum number of capturing subexpressions is 65535."); - } - - // Finalize the top-level sequence, which will take care of any - // top-level alternation. - currentSeq->finalize(); - assert(currentSeq == rootSeq.get()); - - // Ensure that all references are valid. - checkReferences(*rootSeq, groupIndex, groupNames); - - return move(rootSeq); - } catch (LocatedParseError &error) { - if (ts >= ptr && ts <= pe) { - error.locate(ts - ptr); - } else { - error.locate(0); - } - throw; - } -} - -} // namespace ue2 + + // We throw exceptions on various parsing failures beyond this point: we + // use a try/catch block here to clean up our allocated memory before we + // re-throw the exception to the caller. + try { + // Embed the Ragel machine here + %% write init; + %% write exec; + + if (p != pe && *p != '\0') { + // didn't make it to the end of our input, but we didn't throw a ParseError? + assert(0); + ostringstream str; + str << "Parse error at index " << (p - ptr) << "."; + throw ParseError(str.str()); + } + + if (currentCls) { + assert(inCharClass); + assert(currentClsBegin); + ostringstream oss; + oss << "Unterminated character class starting at index " + << currentClsBegin - ptr << "."; + throw ParseError(oss.str()); + } + + if (inComment) { + throw ParseError("Unterminated comment."); + } + + if (!sequences.empty()) { + ostringstream str; + str << "Missing close parenthesis for group started at index " + << sequences.back().seqOffset << "."; + throw ParseError(str.str()); + } + + // Unlikely, but possible + if (groupIndex > 65535) { + throw ParseError("The maximum number of capturing subexpressions is 65535."); + } + + // Finalize the top-level sequence, which will take care of any + // top-level alternation. + currentSeq->finalize(); + assert(currentSeq == rootSeq.get()); + + // Ensure that all references are valid. + checkReferences(*rootSeq, groupIndex, groupNames); + + return move(rootSeq); + } catch (LocatedParseError &error) { + if (ts >= ptr && ts <= pe) { + error.locate(ts - ptr); + } else { + error.locate(0); + } + throw; + } +} + +} // namespace ue2 diff --git a/contrib/libs/hyperscan/src/parser/Utf8ComponentClass.cpp b/contrib/libs/hyperscan/src/parser/Utf8ComponentClass.cpp index c5149fd702..cdfc974acd 100644 --- a/contrib/libs/hyperscan/src/parser/Utf8ComponentClass.cpp +++ b/contrib/libs/hyperscan/src/parser/Utf8ComponentClass.cpp @@ -1,1173 +1,1173 @@ -/* - * Copyright (c) 2015, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -/** \file - * \brief Character class in UTF-8 mode. - */ - - -#include "Utf8ComponentClass.h" - -#include "buildstate.h" -#include "Parser.h" -#include "parse_error.h" -#include "position.h" -#include "position_info.h" -#include "nfagraph/ng_builder.h" -#include "util/compare.h" -#include "util/unicode_def.h" - -#include <cstring> - -#include "ucp_table.h" - -using namespace std; - -namespace ue2 { - -PredefinedClass translateForUcpMode(PredefinedClass in, const ParseMode &mode) { - /* Note: the mapping used here for mapping posix character classes - * matches the observed behaviour of PCRE (lower and upper going to \p{L} - * is not documented by pcre). - * - * Note: this mapping is quite different from both of the mappings - * recommended in the unicode regex tech report (TR-18) appendix C - */ - switch (in) { - case CLASS_ALNUM: - return CLASS_UCP_XAN; - case CLASS_ALPHA: - return CLASS_UCP_L; - case CLASS_BLANK: - return CLASS_HORZ; - case CLASS_DIGIT: - return CLASS_UCP_ND; - case CLASS_GRAPH: - return CLASS_XGRAPH; - case CLASS_LOWER: - if (mode.caseless) { /* we also pick up uppercase titlecase and others */ - return CLASS_UCP_L; - } else { - return CLASS_UCP_LL; - } - case CLASS_PRINT: - return CLASS_XPRINT; - case CLASS_PUNCT: - return CLASS_XPUNCT; - case CLASS_SPACE: - return CLASS_UCP_XPS; - case CLASS_UPPER: - if (mode.caseless) { /* we also pick up lowercase titlecase and others */ - return CLASS_UCP_L; - } else { - return CLASS_UCP_LU; - } - case CLASS_WORD: - return CLASS_UCP_XWD; - default: - return in; - } -} - -CodePointSet getPredefinedCodePointSet(PredefinedClass c, - const ParseMode &mode) { - /* TODO: support properly PCRE_UCP mode and non PCRE_UCP mode */ - switch (c) { - case CLASS_ANY: - if (mode.dotall) { - return CodePointSet(CodePointSet::interval(0, MAX_UNICODE)); - } else { - CodePointSet rv; - rv.set('\n'); - rv.flip(); - return rv; - } - case CLASS_XGRAPH: { - CodePointSet rv; - rv = getUcpZ(); - rv |= getUcpC(); - rv.flip(); - // most of Cf, except for ... - CodePointSet cf = getUcpCf(); - cf.unset(0x061c); - cf.unset(0x180e); - cf.unsetRange(0x2066, 0x2069); - rv |= cf; - return rv; - } - case CLASS_XPRINT: { - // Same as graph, plus everything with the Zs property. - CodePointSet rv = getPredefinedCodePointSet(CLASS_XGRAPH, mode); - rv |= getUcpZs(); - rv.set(0x180e); // Also included in this class by PCRE 8.38. - return rv; - } - case CLASS_XPUNCT: { - // Everything with the P (punctuation) property, plus code points in S - // (symbols) that are < 128. - CodePointSet rv = getUcpP(); - CodePointSet symbols = getUcpS(); - symbols.unsetRange(128, MAX_UNICODE); - rv |= symbols; - return rv; - } - case CLASS_HORZ: { - CodePointSet rv; - rv.set(0x0009); /* Horizontal tab */ - rv.set(0x0020); /* Space */ - rv.set(0x00A0); /* Non-break space */ - rv.set(0x1680); /* Ogham space mark */ - rv.set(0x180E); /* Mongolian vowel separator */ - rv.set(0x2000); /* En quad */ - rv.set(0x2001); /* Em quad */ - rv.set(0x2002); /* En space */ - rv.set(0x2003); /* Em space */ - rv.set(0x2004); /* Three-per-em space */ - rv.set(0x2005); /* Four-per-em space */ - rv.set(0x2006); /* Six-per-em space */ - rv.set(0x2007); /* Figure space */ - rv.set(0x2008); /* Punctuation space */ - rv.set(0x2009); /* Thin space */ - rv.set(0x200A); /* Hair space */ - rv.set(0x202F); /* Narrow no-break space */ - rv.set(0x205F); /* Medium mathematical space */ - rv.set(0x3000); /* Ideographic space */ - return rv; - } - case CLASS_VERT: { - CodePointSet rv; - rv.set(0x000A); /* Linefeed */ - rv.set(0x000B); /* Vertical tab */ - rv.set(0x000C); /* Formfeed */ - rv.set(0x000D); /* Carriage return */ - rv.set(0x0085); /* Next line */ - rv.set(0x2028); /* Line separator */ - rv.set(0x2029); /* Paragraph separator */ - return rv; - } - case CLASS_UCP_XPS: - case CLASS_UCP_XSP: { - CodePointSet rv; - rv.set(0x0009); /* Horizontal tab */ - rv.set(0x0020); /* Space */ - rv.set(0x00A0); /* Non-break space */ - rv.set(0x1680); /* Ogham space mark */ - rv.set(0x180E); /* Mongolian vowel separator */ - rv.set(0x2000); /* En quad */ - rv.set(0x2001); /* Em quad */ - rv.set(0x2002); /* En space */ - rv.set(0x2003); /* Em space */ - rv.set(0x2004); /* Three-per-em space */ - rv.set(0x2005); /* Four-per-em space */ - rv.set(0x2006); /* Six-per-em space */ - rv.set(0x2007); /* Figure space */ - rv.set(0x2008); /* Punctuation space */ - rv.set(0x2009); /* Thin space */ - rv.set(0x200A); /* Hair space */ - rv.set(0x202F); /* Narrow no-break space */ - rv.set(0x205F); /* Medium mathematical space */ - rv.set(0x3000); /* Ideographic space */ - rv.set(0x000A); /* Linefeed */ - rv.set(0x000B); /* Vertical tab */ - rv.set(0x000C); /* Formfeed */ - rv.set(0x000D); /* Carriage return */ - rv.set(0x0085); /* Next line */ - rv.set(0x2028); /* Line separator */ - rv.set(0x2029); /* Paragraph separator */ - return rv; - } - case CLASS_UCP_C: - return getUcpC(); - case CLASS_UCP_CC: - return getUcpCc(); - case CLASS_UCP_CF: - return getUcpCf(); - case CLASS_UCP_CN: - return getUcpCn(); - case CLASS_UCP_CO: - return getUcpCo(); - case CLASS_UCP_CS: - return getUcpCs(); - case CLASS_UCP_L: - return getUcpL(); - case CLASS_UCP_L_AND: - return getUcpL_and(); - case CLASS_UCP_LL: - return getUcpLl(); - case CLASS_UCP_LM: - return getUcpLm(); - case CLASS_UCP_LO: - return getUcpLo(); - case CLASS_UCP_LT: - return getUcpLt(); - case CLASS_UCP_LU: - return getUcpLu(); - case CLASS_UCP_M: - return getUcpM(); - case CLASS_UCP_MC: - return getUcpMc(); - case CLASS_UCP_ME: - return getUcpMe(); - case CLASS_UCP_MN: - return getUcpMn(); - case CLASS_UCP_N: - return getUcpN(); - case CLASS_UCP_ND: - return getUcpNd(); - case CLASS_UCP_NL: - return getUcpNl(); - case CLASS_UCP_NO: - return getUcpNo(); - case CLASS_UCP_P: - return getUcpP(); - case CLASS_UCP_PC: - return getUcpPc(); - case CLASS_UCP_PD: - return getUcpPd(); - case CLASS_UCP_PE: - return getUcpPe(); - case CLASS_UCP_PF: - return getUcpPf(); - case CLASS_UCP_PI: - return getUcpPi(); - case CLASS_UCP_PO: - return getUcpPo(); - case CLASS_UCP_PS: - return getUcpPs(); - case CLASS_UCP_S: - return getUcpS(); - case CLASS_UCP_SC: - return getUcpSc(); - case CLASS_UCP_SK: - return getUcpSk(); - case CLASS_UCP_SM: - return getUcpSm(); - case CLASS_UCP_SO: - return getUcpSo(); - case CLASS_UCP_XAN: - return getUcpXan(); - case CLASS_UCP_XWD: - return getUcpXwd(); - case CLASS_UCP_Z: - return getUcpZ(); - case CLASS_UCP_ZL: - return getUcpZl(); - case CLASS_UCP_ZP: - return getUcpZp(); - case CLASS_UCP_ZS: - return getUcpZs(); - case CLASS_SCRIPT_ARABIC: - return getUcpArabic(); - case CLASS_SCRIPT_ARMENIAN: - return getUcpArmenian(); - case CLASS_SCRIPT_AVESTAN: - return getUcpAvestan(); - case CLASS_SCRIPT_BALINESE: - return getUcpBalinese(); - case CLASS_SCRIPT_BAMUM: - return getUcpBamum(); - case CLASS_SCRIPT_BATAK: - return getUcpBatak(); - case CLASS_SCRIPT_BENGALI: - return getUcpBengali(); - case CLASS_SCRIPT_BOPOMOFO: - return getUcpBopomofo(); - case CLASS_SCRIPT_BRAHMI: - return getUcpBrahmi(); - case CLASS_SCRIPT_BRAILLE: - return getUcpBraille(); - case CLASS_SCRIPT_BUGINESE: - return getUcpBuginese(); - case CLASS_SCRIPT_BUHID: - return getUcpBuhid(); - case CLASS_SCRIPT_CANADIAN_ABORIGINAL: - return getUcpCanadian_Aboriginal(); - case CLASS_SCRIPT_CARIAN: - return getUcpCarian(); - case CLASS_SCRIPT_CHAM: - return getUcpCham(); - case CLASS_SCRIPT_CHEROKEE: - return getUcpCherokee(); - case CLASS_SCRIPT_COMMON: - return getUcpCommon(); - case CLASS_SCRIPT_COPTIC: - return getUcpCoptic(); - case CLASS_SCRIPT_CUNEIFORM: - return getUcpCuneiform(); - case CLASS_SCRIPT_CYPRIOT: - return getUcpCypriot(); - case CLASS_SCRIPT_CYRILLIC: - return getUcpCyrillic(); - case CLASS_SCRIPT_DESERET: - return getUcpDeseret(); - case CLASS_SCRIPT_DEVANAGARI: - return getUcpDevanagari(); - case CLASS_SCRIPT_EGYPTIAN_HIEROGLYPHS: - return getUcpEgyptian_Hieroglyphs(); - case CLASS_SCRIPT_ETHIOPIC: - return getUcpEthiopic(); - case CLASS_SCRIPT_GEORGIAN: - return getUcpGeorgian(); - case CLASS_SCRIPT_GLAGOLITIC: - return getUcpGlagolitic(); - case CLASS_SCRIPT_GOTHIC: - return getUcpGothic(); - case CLASS_SCRIPT_GREEK: - return getUcpGreek(); - case CLASS_SCRIPT_GUJARATI: - return getUcpGujarati(); - case CLASS_SCRIPT_GURMUKHI: - return getUcpGurmukhi(); - case CLASS_SCRIPT_HAN: - return getUcpHan(); - case CLASS_SCRIPT_HANGUL: - return getUcpHangul(); - case CLASS_SCRIPT_HANUNOO: - return getUcpHanunoo(); - case CLASS_SCRIPT_HEBREW: - return getUcpHebrew(); - case CLASS_SCRIPT_HIRAGANA: - return getUcpHiragana(); - case CLASS_SCRIPT_IMPERIAL_ARAMAIC: - return getUcpImperial_Aramaic(); - case CLASS_SCRIPT_INHERITED: - return getUcpInherited(); - case CLASS_SCRIPT_INSCRIPTIONAL_PAHLAVI: - return getUcpInscriptional_Pahlavi(); - case CLASS_SCRIPT_INSCRIPTIONAL_PARTHIAN: - return getUcpInscriptional_Parthian(); - case CLASS_SCRIPT_JAVANESE: - return getUcpJavanese(); - case CLASS_SCRIPT_KAITHI: - return getUcpKaithi(); - case CLASS_SCRIPT_KANNADA: - return getUcpKannada(); - case CLASS_SCRIPT_KATAKANA: - return getUcpKatakana(); - case CLASS_SCRIPT_KAYAH_LI: - return getUcpKayah_Li(); - case CLASS_SCRIPT_KHAROSHTHI: - return getUcpKharoshthi(); - case CLASS_SCRIPT_KHMER: - return getUcpKhmer(); - case CLASS_SCRIPT_LAO: - return getUcpLao(); - case CLASS_SCRIPT_LATIN: - return getUcpLatin(); - case CLASS_SCRIPT_LEPCHA: - return getUcpLepcha(); - case CLASS_SCRIPT_LIMBU: - return getUcpLimbu(); - case CLASS_SCRIPT_LINEAR_B: - return getUcpLinear_B(); - case CLASS_SCRIPT_LISU: - return getUcpLisu(); - case CLASS_SCRIPT_LYCIAN: - return getUcpLycian(); - case CLASS_SCRIPT_LYDIAN: - return getUcpLydian(); - case CLASS_SCRIPT_MALAYALAM: - return getUcpMalayalam(); - case CLASS_SCRIPT_MANDAIC: - return getUcpMandaic(); - case CLASS_SCRIPT_MEETEI_MAYEK: - return getUcpMeetei_Mayek(); - case CLASS_SCRIPT_MONGOLIAN: - return getUcpMongolian(); - case CLASS_SCRIPT_MYANMAR: - return getUcpMyanmar(); - case CLASS_SCRIPT_NEW_TAI_LUE: - return getUcpNew_Tai_Lue(); - case CLASS_SCRIPT_NKO: - return getUcpNko(); - case CLASS_SCRIPT_OGHAM: - return getUcpOgham(); - case CLASS_SCRIPT_OL_CHIKI: - return getUcpOl_Chiki(); - case CLASS_SCRIPT_OLD_ITALIC: - return getUcpOld_Italic(); - case CLASS_SCRIPT_OLD_PERSIAN: - return getUcpOld_Persian(); - case CLASS_SCRIPT_OLD_SOUTH_ARABIAN: - return getUcpOld_South_Arabian(); - case CLASS_SCRIPT_OLD_TURKIC: - return getUcpOld_Turkic(); - case CLASS_SCRIPT_ORIYA: - return getUcpOriya(); - case CLASS_SCRIPT_OSMANYA: - return getUcpOsmanya(); - case CLASS_SCRIPT_PHAGS_PA: - return getUcpPhags_Pa(); - case CLASS_SCRIPT_PHOENICIAN: - return getUcpPhoenician(); - case CLASS_SCRIPT_REJANG: - return getUcpRejang(); - case CLASS_SCRIPT_RUNIC: - return getUcpRunic(); - case CLASS_SCRIPT_SAMARITAN: - return getUcpSamaritan(); - case CLASS_SCRIPT_SAURASHTRA: - return getUcpSaurashtra(); - case CLASS_SCRIPT_SHAVIAN: - return getUcpShavian(); - case CLASS_SCRIPT_SINHALA: - return getUcpSinhala(); - case CLASS_SCRIPT_SUNDANESE: - return getUcpSundanese(); - case CLASS_SCRIPT_SYLOTI_NAGRI: - return getUcpSyloti_Nagri(); - case CLASS_SCRIPT_SYRIAC: - return getUcpSyriac(); - case CLASS_SCRIPT_TAGALOG: - return getUcpTagalog(); - case CLASS_SCRIPT_TAGBANWA: - return getUcpTagbanwa(); - case CLASS_SCRIPT_TAI_LE: - return getUcpTai_Le(); - case CLASS_SCRIPT_TAI_THAM: - return getUcpTai_Tham(); - case CLASS_SCRIPT_TAI_VIET: - return getUcpTai_Viet(); - case CLASS_SCRIPT_TAMIL: - return getUcpTamil(); - case CLASS_SCRIPT_TELUGU: - return getUcpTelugu(); - case CLASS_SCRIPT_THAANA: - return getUcpThaana(); - case CLASS_SCRIPT_THAI: - return getUcpThai(); - case CLASS_SCRIPT_TIBETAN: - return getUcpTibetan(); - case CLASS_SCRIPT_TIFINAGH: - return getUcpTifinagh(); - case CLASS_SCRIPT_UGARITIC: - return getUcpUgaritic(); - case CLASS_SCRIPT_VAI: - return getUcpVai(); - case CLASS_SCRIPT_YI: - return getUcpYi(); - case CLASS_UCP_ANY: - return CodePointSet(CodePointSet::interval(0, MAX_UNICODE)); - - default: { /* currently uses ascii defns */ - CharReach cr = getPredefinedCharReach(c, mode); - CodePointSet rv; - for (u32 i = cr.find_first(); i != CharReach::npos; - i = cr.find_next(i)) { - rv.set(i); - } - return rv; - } - } -} - -UTF8ComponentClass::UTF8ComponentClass(const ParseMode &mode_in) - : ComponentClass(mode_in), - single_pos( GlushkovBuildState::POS_UNINITIALIZED), - one_dot_trailer( GlushkovBuildState::POS_UNINITIALIZED), - two_dot_trailer( GlushkovBuildState::POS_UNINITIALIZED), - three_dot_trailer( GlushkovBuildState::POS_UNINITIALIZED), - two_char_dot_head( GlushkovBuildState::POS_UNINITIALIZED), - three_char_dot_head(GlushkovBuildState::POS_UNINITIALIZED), - four_char_dot_head( GlushkovBuildState::POS_UNINITIALIZED) { - assert(mode.utf8); -} - -UTF8ComponentClass *UTF8ComponentClass::clone() const { - return new UTF8ComponentClass(*this); -} - -bool UTF8ComponentClass::class_empty(void) const { - assert(finalized); - return cps.none(); -} - -void UTF8ComponentClass::createRange(unichar to) { - assert(range_start != INVALID_UNICODE); - unichar from = range_start; - if (from > to) { - throw LocatedParseError("Range out of order in character class"); - } - - in_cand_range = false; - CodePointSet ncps; - ncps.setRange(from, to); - if (mode.caseless) { - make_caseless(&ncps); - } - cps |= ncps; - range_start = INVALID_UNICODE; -} - -void UTF8ComponentClass::add(PredefinedClass c, bool negative) { - if (in_cand_range) { // can't form a range here - throw LocatedParseError("Invalid range in character class"); - } - - if (mode.ucp) { - c = translateForUcpMode(c, mode); - } - - // caselessness is handled inside this call - don't apply make_caseless - // to the result - CodePointSet pcps = getPredefinedCodePointSet(c, mode); - if (negative) { - pcps.flip(); - } - - cps |= pcps; - - range_start = INVALID_UNICODE; - in_cand_range = false; -} - -void UTF8ComponentClass::add(unichar c) { - DEBUG_PRINTF("adding \\x%08x\n", c); - if (c > MAX_UNICODE) { // too big! - throw LocatedParseError("Hexadecimal value is greater than \\x10FFFF"); - } - - if (in_cand_range) { - createRange(c); - return; - } - - CodePointSet ncps; - ncps.set(c); - if (mode.caseless) { - make_caseless(&ncps); - } - cps |= ncps; - range_start = c; -} - -void UTF8ComponentClass::finalize() { - if (finalized) { - return; - } - - // Handle unclosed ranges, like '[a-]' and '[a-\Q\E]' -- in these cases the - // dash is a literal dash. - if (in_cand_range) { - cps.set('-'); - in_cand_range = false; - } - - if (m_negate) { - cps.flip(); - } - - finalized = true; -} - -Position UTF8ComponentClass::getHead(NFABuilder &builder, u8 first_byte) { - map<u8, Position>::const_iterator it = heads.find(first_byte); - if (it != heads.end()) { - return it->second; - } - - Position head = builder.makePositions(1); - assert(heads.find(first_byte) == heads.end()); - builder.addCharReach(head, CharReach(first_byte)); - /* no report id as head can not be directly wired to accept */ - - heads[first_byte] = head; - return head; -} - -void UTF8ComponentClass::ensureDotTrailer(GlushkovBuildState &bs) { - NFABuilder &builder = bs.getBuilder(); - if (one_dot_trailer != GlushkovBuildState::POS_UNINITIALIZED) { - return; - } - - one_dot_trailer = builder.makePositions(1); - builder.setNodeReportID(one_dot_trailer, 0); - builder.addCharReach(one_dot_trailer, CharReach(0x80, 0xbf)); - tails.insert(one_dot_trailer); -} - -void UTF8ComponentClass::ensureTwoDotTrailer(GlushkovBuildState &bs) { - NFABuilder &builder = bs.getBuilder(); - if (two_dot_trailer != GlushkovBuildState::POS_UNINITIALIZED) { - return; - } - - ensureDotTrailer(bs); - - two_dot_trailer = builder.makePositions(1); - builder.addCharReach(two_dot_trailer, CharReach(0x80, 0xbf)); - bs.addSuccessor(two_dot_trailer, one_dot_trailer); -} - -void UTF8ComponentClass::ensureThreeDotTrailer(GlushkovBuildState &bs) { - NFABuilder &builder = bs.getBuilder(); - if (three_dot_trailer != GlushkovBuildState::POS_UNINITIALIZED) { - return; - } - - ensureTwoDotTrailer(bs); - - three_dot_trailer = builder.makePositions(1); - builder.addCharReach(three_dot_trailer, CharReach(0x80, 0xbf)); - bs.addSuccessor(three_dot_trailer, two_dot_trailer); -} - -void UTF8ComponentClass::buildOneByte(GlushkovBuildState &bs) { - NFABuilder &builder = bs.getBuilder(); - for (CodePointSet::const_iterator it = cps.begin(); it != cps.end(); ++it) { - unichar b = lower(*it); - unichar e = upper(*it) + 1; - if (b >= UTF_2CHAR_MIN) { - continue; - } - - DEBUG_PRINTF("building vertices for [%u, %u)\n", b, e); - - if (single_pos == GlushkovBuildState::POS_UNINITIALIZED) { - single_pos = builder.makePositions(1); - builder.setNodeReportID(single_pos, 0 /* offset adj */); - tails.insert(single_pos); - } - CharReach cr(b, MIN(e, UTF_2CHAR_MIN) - 1); - builder.addCharReach(single_pos, cr); - } -} - -void UTF8ComponentClass::addToTail(GlushkovBuildState &bs, - map<Position, Position> &finals, - Position prev, unichar b, unichar e) { - NFABuilder &builder = bs.getBuilder(); - Position tail; - if (finals.find(prev) == finals.end()) { - tail = builder.makePositions(1); - builder.setNodeReportID(tail, 0 /* offset adj */); - bs.addSuccessor(prev, tail); - finals[prev] = tail; - tails.insert(tail); - } else { - tail = finals[prev]; - } - - u8 bb = makeContByte(b); - u8 ee = makeContByte(e - 1); - builder.addCharReach(tail, CharReach(bb, ee)); -} - -void UTF8ComponentClass::buildTwoByte(GlushkovBuildState &bs) { - NFABuilder &builder = bs.getBuilder(); - map<Position, Position> finals; - - for (auto it = cps.begin(); it != cps.end(); ++it) { - unichar b = lower(*it); - unichar e = upper(*it) + 1; - - b = MAX(b, UTF_2CHAR_MIN); - e = MIN(e, UTF_3CHAR_MIN); - - if (b >= e) { - continue; /* we're done here */ - } - - /* raise b to the start of the next tail byte boundary */ - if (b & UTF_CONT_BYTE_VALUE_MASK) { - unichar bb = MIN(e, ROUNDUP_N(b, UTF_CONT_BYTE_RANGE)); - u8 first_byte = UTF_TWO_BYTE_HEADER | (b >> UTF_CONT_SHIFT); - assert(first_byte > 0xc1 && first_byte <= 0xdf); - - Position head = getHead(builder, first_byte); - addToTail(bs, finals, head, b, bb); - - b = bb; - } - - if (b == e) { - continue; /* we're done here */ - } - assert(b < e); - - /* lower e to the end of a tail byte boundary */ - if (e & UTF_CONT_BYTE_VALUE_MASK) { - unichar ee = e & ~UTF_CONT_BYTE_VALUE_MASK; - assert(ee >= b); - - u8 first_byte = UTF_TWO_BYTE_HEADER | (ee >> UTF_CONT_SHIFT); - assert(first_byte > 0xc1 && first_byte <= 0xdf); - - Position head = getHead(builder, first_byte); - addToTail(bs, finals, head, ee, e); - - e = ee; - } - - if (b == e) { - continue; /* we're done here */ - } - assert(b < e); - - /* middle section just goes to a common full vertex */ - ensureDotTrailer(bs); - - if (two_char_dot_head == GlushkovBuildState::POS_UNINITIALIZED) { - two_char_dot_head = builder.makePositions(1); - bs.addSuccessor(two_char_dot_head, one_dot_trailer); - } - - u8 min_first_byte = UTF_TWO_BYTE_HEADER | (b >> UTF_CONT_SHIFT); - u8 max_first_byte = UTF_TWO_BYTE_HEADER | ((e - 1) >> UTF_CONT_SHIFT); - - assert(min_first_byte > 0xc1 && min_first_byte <= 0xdf); - assert(max_first_byte > 0xc1 && max_first_byte <= 0xdf); - - builder.addCharReach(two_char_dot_head, - CharReach(min_first_byte, max_first_byte)); - } -} - -static -Position getMid(GlushkovBuildState &bs, map<Position, map<u8, Position> > &mids, - const Position &prev, u8 byte_val) { - NFABuilder &builder = bs.getBuilder(); - map<u8, Position> &by_byte = mids[prev]; - - map<u8, Position>::const_iterator it = by_byte.find(byte_val); - if (it != by_byte.end()) { - return it->second; - } - - Position mid = builder.makePositions(1); - builder.addCharReach(mid, CharReach(byte_val)); - bs.addSuccessor(prev, mid); - /* no report id as mid can not be directly wired to accept */ - - by_byte[byte_val] = mid; - return mid; -} - -void UTF8ComponentClass::buildThreeByte(GlushkovBuildState &bs) { - NFABuilder &builder = bs.getBuilder(); - - map<Position, map<u8, Position> > mids; - map<Position, Position> finals; - - for (auto it = cps.begin(); it != cps.end(); ++it) { - unichar b = lower(*it); - unichar e = upper(*it) + 1; - - b = MAX(b, UTF_3CHAR_MIN); - e = MIN(e, UTF_4CHAR_MIN); - - if (b >= e) { - continue; /* we're done here */ - } - - /* raise b to the start of the next tail byte boundary */ - if (b & UTF_CONT_BYTE_VALUE_MASK) { - unichar bb = MIN(e, ROUNDUP_N(b, UTF_CONT_BYTE_RANGE)); - - u8 first_byte = UTF_THREE_BYTE_HEADER | (b >> (2 * UTF_CONT_SHIFT)); - assert(first_byte >= 0xe0 && first_byte <= 0xef); - Position head = getHead(builder, first_byte); - - u8 second_byte = makeContByte(b >> UTF_CONT_SHIFT); - Position mid = getMid(bs, mids, head, second_byte); - - addToTail(bs, finals, mid, b, bb); - - b = bb; - } - - if (b == e) { - continue; /* we're done here */ - } - assert(b < e); - - /* lower e to the end of a tail byte boundary */ - if (e & UTF_CONT_BYTE_VALUE_MASK) { - unichar ee = e & ~UTF_CONT_BYTE_VALUE_MASK; - assert(ee >= b); - - u8 first_byte = UTF_THREE_BYTE_HEADER - | (ee >> (2 * UTF_CONT_SHIFT)); - assert(first_byte >= 0xe0 && first_byte <= 0xef); - Position head = getHead(builder, first_byte); - - u8 second_byte = makeContByte(ee >> UTF_CONT_SHIFT); - Position mid = getMid(bs, mids, head, second_byte); - - addToTail(bs, finals, mid, ee, e); - - e = ee; - } - - if (b == e) { - continue; /* we're done here */ - } - assert(b < e); - - /* from here on in the last byte is always full */ - ensureDotTrailer(bs); - - /* raise b to the start of the next mid byte boundary */ - if (b & ((1 << (2 * UTF_CONT_SHIFT)) - 1)) { - unichar bb = MIN(e, ROUNDUP_N(b, 1 << (2 * UTF_CONT_SHIFT))); - - u8 first_byte = UTF_THREE_BYTE_HEADER | (b >> (2 * UTF_CONT_SHIFT)); - Position head = getHead(builder, first_byte); - - Position mid = builder.makePositions(1); - bs.addSuccessor(head, mid); - bs.addSuccessor(mid, one_dot_trailer); - /* no report id as mid can not be directly wired to accept, - * not adding to mids as we are completely filling its downstream */ - u8 second_min = makeContByte(b >> UTF_CONT_SHIFT); - u8 second_max = makeContByte((bb - 1) >> UTF_CONT_SHIFT); - - builder.addCharReach(mid, CharReach(second_min, second_max)); - - b = bb; - } - - if (b == e) { - continue; /* we're done here */ - } - assert(b < e); - - /* lower e to the end of a mid byte boundary */ - if (e & ((1 << (2 * UTF_CONT_SHIFT)) - 1)) { - unichar ee = e & ~((1 << (2 * UTF_CONT_SHIFT)) - 1); - assert(ee >= b); - - u8 first_byte = UTF_THREE_BYTE_HEADER - | (ee >> (2 * UTF_CONT_SHIFT)); - Position head = getHead(builder, first_byte); - - Position mid = builder.makePositions(1); - bs.addSuccessor(head, mid); - bs.addSuccessor(mid, one_dot_trailer); - /* no report id as mid can not be directly wired to accept, - * not adding to mids as we are completely filling its downstream */ - u8 second_min = makeContByte(ee >> UTF_CONT_SHIFT); - u8 second_max = makeContByte((e - 1) >> UTF_CONT_SHIFT); - - builder.addCharReach(mid, CharReach(second_min, second_max)); - - e = ee; - } - - if (b == e) { - continue; /* we're done here */ - } - assert(b < e); - - /* now we just have to wire head to a common dot trailer */ - ensureTwoDotTrailer(bs); - if (three_char_dot_head == GlushkovBuildState::POS_UNINITIALIZED) { - three_char_dot_head = builder.makePositions(1); - bs.addSuccessor(three_char_dot_head, two_dot_trailer); - } - - u8 min_first_byte = UTF_THREE_BYTE_HEADER - | (b >> (2 * UTF_CONT_SHIFT)); - u8 max_first_byte = UTF_THREE_BYTE_HEADER - | ((e - 1) >> (2 * UTF_CONT_SHIFT)); - - assert(min_first_byte > 0xdf && min_first_byte <= 0xef); - assert(max_first_byte > 0xdf && max_first_byte <= 0xef); - - builder.addCharReach(three_char_dot_head, - CharReach(min_first_byte, max_first_byte)); - } -} - -static -u8 makeFirstByteOfFour(unichar raw) { - u8 first_byte = UTF_FOUR_BYTE_HEADER | (raw >> (3 * UTF_CONT_SHIFT)); - assert(first_byte > 0xef && first_byte <= 0xf7); - return first_byte; -} - -static -bool isTwoContAligned(unichar raw) { - return !(raw & ((1 << (2 * UTF_CONT_SHIFT)) - 1)); -} - -static -bool isThreeContAligned(unichar raw) { - return !(raw & ((1 << (3 * UTF_CONT_SHIFT)) - 1)); -} - -void UTF8ComponentClass::buildFourByte(GlushkovBuildState &bs) { - NFABuilder &builder = bs.getBuilder(); - map<Position, map<u8, Position> > mids; - map<Position, Position> finals; - - for (auto it = cps.begin(); it != cps.end(); ++it) { - unichar b = lower(*it); - unichar e = upper(*it) + 1; - - b = MAX(b, UTF_4CHAR_MIN); - e = MIN(e, MAX_UNICODE + 1); - - if (b >= e) { - continue; - } - - /* raise b to the start of the next tail byte boundary */ - if (b & UTF_CONT_BYTE_VALUE_MASK) { - unichar bb = MIN(e, ROUNDUP_N(b, UTF_CONT_BYTE_RANGE)); - - u8 first_byte = makeFirstByteOfFour(b); - Position head = getHead(builder, first_byte); - - u8 second_byte = makeContByte(b >> (2 * UTF_CONT_SHIFT)); - Position mid1 = getMid(bs, mids, head, second_byte); - - u8 third_byte = makeContByte(b >> UTF_CONT_SHIFT); - Position mid2 = getMid(bs, mids, mid1, third_byte); - - addToTail(bs, finals, mid2, b, bb); - - b = bb; - } - - if (b == e) { - continue; /* we're done here */ - } - assert(b < e); - - /* lower e to the end of a tail byte boundary */ - if (e & UTF_CONT_BYTE_VALUE_MASK) { - unichar ee = e & ~UTF_CONT_BYTE_VALUE_MASK; - assert(ee >= b); - - u8 first_byte = makeFirstByteOfFour(ee); - Position head = getHead(builder, first_byte); - - u8 second_byte = makeContByte(ee >> (2 * UTF_CONT_SHIFT)); - Position mid1 = getMid(bs, mids, head, second_byte); - - u8 third_byte = makeContByte(ee >> UTF_CONT_SHIFT); - Position mid2 = getMid(bs, mids, mid1, third_byte); - - addToTail(bs, finals, mid2, ee, e); - - e = ee; - } - - if (b == e) { - continue; /* we're done here */ - } - assert(b < e); - - /* from here on in the last byte is always full */ - ensureDotTrailer(bs); - - /* raise b to the start of the next mid byte boundary */ - if (!isTwoContAligned(b)) { - unichar bb = MIN(e, ROUNDUP_N(b, 1 << (2 * UTF_CONT_SHIFT))); - - u8 first_byte = makeFirstByteOfFour(b); - Position head = getHead(builder, first_byte); - - u8 second_byte = makeContByte(b >> (2 * UTF_CONT_SHIFT)); - Position mid1 = getMid(bs, mids, head, second_byte); - - Position mid2 = builder.makePositions(1); - bs.addSuccessor(mid1, mid2); - bs.addSuccessor(mid2, one_dot_trailer); - /* no report id as mid can not be directly wired to accept, - * not adding to mids as we are completely filling its downstream */ - u8 byte_min = makeContByte(b >> UTF_CONT_SHIFT); - u8 byte_max = makeContByte((bb - 1) >> UTF_CONT_SHIFT); - - builder.addCharReach(mid2, CharReach(byte_min, byte_max)); - - b = bb; - } - - if (b == e) { - continue; /* we're done here */ - } - assert(b < e); - - /* lower e to the end of a mid byte boundary */ - if (!isTwoContAligned(e)) { - unichar ee = e & ~((1 << (2 * UTF_CONT_SHIFT)) - 1); - assert(ee >= b); - - u8 first_byte = makeFirstByteOfFour(ee); - Position head = getHead(builder, first_byte); - - u8 second_byte = makeContByte(ee >> (2 * UTF_CONT_SHIFT)); - Position mid1 = getMid(bs, mids, head, second_byte); - - Position mid2 = builder.makePositions(1); - bs.addSuccessor(mid1, mid2); - bs.addSuccessor(mid2, one_dot_trailer); - /* no report id as mid can not be directly wired to accept, - * not adding to mids as we are completely filling its downstream */ - u8 byte_min = makeContByte(ee >> UTF_CONT_SHIFT); - u8 byte_max = makeContByte((e - 1) >> UTF_CONT_SHIFT); - - builder.addCharReach(mid2, CharReach(byte_min, byte_max)); - - e = ee; - } - - if (b == e) { - continue; /* we're done here */ - } - assert(b < e); - - ensureTwoDotTrailer(bs); - - /* raise b to the next byte boundary */ - if (!isThreeContAligned(b)) { - unichar bb = MIN(e, ROUNDUP_N(b, 1 << (3 * UTF_CONT_SHIFT))); - - u8 first_byte = makeFirstByteOfFour(b); - Position head = getHead(builder, first_byte); - - Position mid1 = builder.makePositions(1); - bs.addSuccessor(head, mid1); - bs.addSuccessor(mid1, two_dot_trailer); - /* no report id as mid can not be directly wired to accept, - * not adding to mids as we are completely filling its downstream */ - u8 byte_min = makeContByte(b >> (2 * UTF_CONT_SHIFT)); - u8 byte_max = makeContByte((bb - 1) >> (2 * UTF_CONT_SHIFT)); - - builder.addCharReach(mid1, CharReach(byte_min, byte_max)); - - b = bb; - } - - if (b == e) { - continue; /* we're done here */ - } - assert(b < e); - - /* lower e to the next byte boundary */ - if (!isThreeContAligned(e)) { - unichar ee = e & ~((1 << (3 * UTF_CONT_SHIFT)) - 1); - assert(ee >= b); - - u8 first_byte = makeFirstByteOfFour(ee); - Position head = getHead(builder, first_byte); - Position mid1 = builder.makePositions(1); - bs.addSuccessor(head, mid1); - bs.addSuccessor(mid1, two_dot_trailer); - /* no report id as mid can not be directly wired to accept, - * not adding to mids as we are completely filling its downstream */ - u8 byte_min = makeContByte(ee >> (2 * UTF_CONT_SHIFT)); - u8 byte_max = makeContByte((e - 1) >> (2 * UTF_CONT_SHIFT)); - - builder.addCharReach(mid1, CharReach(byte_min, byte_max)); - - e = ee; - } - - if (b == e) { - continue; /* we're done here */ - } - assert(b < e); - - /* now we just have to wire head to a common dot trailer */ - ensureThreeDotTrailer(bs); - if (four_char_dot_head == GlushkovBuildState::POS_UNINITIALIZED) { - four_char_dot_head = builder.makePositions(1); - bs.addSuccessor(four_char_dot_head, three_dot_trailer); - } - - u8 min_first_byte = makeFirstByteOfFour(b); - u8 max_first_byte = makeFirstByteOfFour(e - 1); - - builder.addCharReach(four_char_dot_head, - CharReach(min_first_byte, max_first_byte)); - } -} - -void UTF8ComponentClass::notePositions(GlushkovBuildState &bs) { - // We should always be finalized by now. - assert(finalized); - - // An empty class is a special case; this would be generated by something - // like /[\s\S]/8, which can never match. We treat these like we do the non - // UTF-8 version: add a vertex with empty reach (to ensure we create a - // connected graph) and pick it up later on. - if (class_empty()) { - DEBUG_PRINTF("empty class!\n"); - assert(single_pos == GlushkovBuildState::POS_UNINITIALIZED); - NFABuilder &builder = bs.getBuilder(); - single_pos = builder.makePositions(1); - builder.setNodeReportID(single_pos, 0 /* offset adj */); - builder.addCharReach(single_pos, CharReach()); - tails.insert(single_pos); - return; - } - - buildOneByte(bs); - buildTwoByte(bs); - buildThreeByte(bs); - buildFourByte(bs); -} - -void UTF8ComponentClass::buildFollowSet(GlushkovBuildState &, - const vector<PositionInfo> &) { - /* states are wired in notePositions as all belong to this component. */ -} - -vector<PositionInfo> UTF8ComponentClass::first(void) const { - vector<PositionInfo> rv; - if (single_pos != GlushkovBuildState::POS_UNINITIALIZED) { - rv.push_back(single_pos); - } - if (two_char_dot_head != GlushkovBuildState::POS_UNINITIALIZED) { - rv.push_back(two_char_dot_head); - } - if (three_char_dot_head != GlushkovBuildState::POS_UNINITIALIZED) { - rv.push_back(three_char_dot_head); - } - if (four_char_dot_head != GlushkovBuildState::POS_UNINITIALIZED) { - rv.push_back(four_char_dot_head); - } - - for (auto it = heads.begin(); it != heads.end(); ++it) { - rv.push_back(it->second); - } - return rv; -} - -vector<PositionInfo> UTF8ComponentClass::last(void) const { - vector<PositionInfo> rv; - - rv.insert(rv.end(), tails.begin(), tails.end()); - return rv; -} - -} // namespace ue2 +/* + * Copyright (c) 2015, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/** \file + * \brief Character class in UTF-8 mode. + */ + + +#include "Utf8ComponentClass.h" + +#include "buildstate.h" +#include "Parser.h" +#include "parse_error.h" +#include "position.h" +#include "position_info.h" +#include "nfagraph/ng_builder.h" +#include "util/compare.h" +#include "util/unicode_def.h" + +#include <cstring> + +#include "ucp_table.h" + +using namespace std; + +namespace ue2 { + +PredefinedClass translateForUcpMode(PredefinedClass in, const ParseMode &mode) { + /* Note: the mapping used here for mapping posix character classes + * matches the observed behaviour of PCRE (lower and upper going to \p{L} + * is not documented by pcre). + * + * Note: this mapping is quite different from both of the mappings + * recommended in the unicode regex tech report (TR-18) appendix C + */ + switch (in) { + case CLASS_ALNUM: + return CLASS_UCP_XAN; + case CLASS_ALPHA: + return CLASS_UCP_L; + case CLASS_BLANK: + return CLASS_HORZ; + case CLASS_DIGIT: + return CLASS_UCP_ND; + case CLASS_GRAPH: + return CLASS_XGRAPH; + case CLASS_LOWER: + if (mode.caseless) { /* we also pick up uppercase titlecase and others */ + return CLASS_UCP_L; + } else { + return CLASS_UCP_LL; + } + case CLASS_PRINT: + return CLASS_XPRINT; + case CLASS_PUNCT: + return CLASS_XPUNCT; + case CLASS_SPACE: + return CLASS_UCP_XPS; + case CLASS_UPPER: + if (mode.caseless) { /* we also pick up lowercase titlecase and others */ + return CLASS_UCP_L; + } else { + return CLASS_UCP_LU; + } + case CLASS_WORD: + return CLASS_UCP_XWD; + default: + return in; + } +} + +CodePointSet getPredefinedCodePointSet(PredefinedClass c, + const ParseMode &mode) { + /* TODO: support properly PCRE_UCP mode and non PCRE_UCP mode */ + switch (c) { + case CLASS_ANY: + if (mode.dotall) { + return CodePointSet(CodePointSet::interval(0, MAX_UNICODE)); + } else { + CodePointSet rv; + rv.set('\n'); + rv.flip(); + return rv; + } + case CLASS_XGRAPH: { + CodePointSet rv; + rv = getUcpZ(); + rv |= getUcpC(); + rv.flip(); + // most of Cf, except for ... + CodePointSet cf = getUcpCf(); + cf.unset(0x061c); + cf.unset(0x180e); + cf.unsetRange(0x2066, 0x2069); + rv |= cf; + return rv; + } + case CLASS_XPRINT: { + // Same as graph, plus everything with the Zs property. + CodePointSet rv = getPredefinedCodePointSet(CLASS_XGRAPH, mode); + rv |= getUcpZs(); + rv.set(0x180e); // Also included in this class by PCRE 8.38. + return rv; + } + case CLASS_XPUNCT: { + // Everything with the P (punctuation) property, plus code points in S + // (symbols) that are < 128. + CodePointSet rv = getUcpP(); + CodePointSet symbols = getUcpS(); + symbols.unsetRange(128, MAX_UNICODE); + rv |= symbols; + return rv; + } + case CLASS_HORZ: { + CodePointSet rv; + rv.set(0x0009); /* Horizontal tab */ + rv.set(0x0020); /* Space */ + rv.set(0x00A0); /* Non-break space */ + rv.set(0x1680); /* Ogham space mark */ + rv.set(0x180E); /* Mongolian vowel separator */ + rv.set(0x2000); /* En quad */ + rv.set(0x2001); /* Em quad */ + rv.set(0x2002); /* En space */ + rv.set(0x2003); /* Em space */ + rv.set(0x2004); /* Three-per-em space */ + rv.set(0x2005); /* Four-per-em space */ + rv.set(0x2006); /* Six-per-em space */ + rv.set(0x2007); /* Figure space */ + rv.set(0x2008); /* Punctuation space */ + rv.set(0x2009); /* Thin space */ + rv.set(0x200A); /* Hair space */ + rv.set(0x202F); /* Narrow no-break space */ + rv.set(0x205F); /* Medium mathematical space */ + rv.set(0x3000); /* Ideographic space */ + return rv; + } + case CLASS_VERT: { + CodePointSet rv; + rv.set(0x000A); /* Linefeed */ + rv.set(0x000B); /* Vertical tab */ + rv.set(0x000C); /* Formfeed */ + rv.set(0x000D); /* Carriage return */ + rv.set(0x0085); /* Next line */ + rv.set(0x2028); /* Line separator */ + rv.set(0x2029); /* Paragraph separator */ + return rv; + } + case CLASS_UCP_XPS: + case CLASS_UCP_XSP: { + CodePointSet rv; + rv.set(0x0009); /* Horizontal tab */ + rv.set(0x0020); /* Space */ + rv.set(0x00A0); /* Non-break space */ + rv.set(0x1680); /* Ogham space mark */ + rv.set(0x180E); /* Mongolian vowel separator */ + rv.set(0x2000); /* En quad */ + rv.set(0x2001); /* Em quad */ + rv.set(0x2002); /* En space */ + rv.set(0x2003); /* Em space */ + rv.set(0x2004); /* Three-per-em space */ + rv.set(0x2005); /* Four-per-em space */ + rv.set(0x2006); /* Six-per-em space */ + rv.set(0x2007); /* Figure space */ + rv.set(0x2008); /* Punctuation space */ + rv.set(0x2009); /* Thin space */ + rv.set(0x200A); /* Hair space */ + rv.set(0x202F); /* Narrow no-break space */ + rv.set(0x205F); /* Medium mathematical space */ + rv.set(0x3000); /* Ideographic space */ + rv.set(0x000A); /* Linefeed */ + rv.set(0x000B); /* Vertical tab */ + rv.set(0x000C); /* Formfeed */ + rv.set(0x000D); /* Carriage return */ + rv.set(0x0085); /* Next line */ + rv.set(0x2028); /* Line separator */ + rv.set(0x2029); /* Paragraph separator */ + return rv; + } + case CLASS_UCP_C: + return getUcpC(); + case CLASS_UCP_CC: + return getUcpCc(); + case CLASS_UCP_CF: + return getUcpCf(); + case CLASS_UCP_CN: + return getUcpCn(); + case CLASS_UCP_CO: + return getUcpCo(); + case CLASS_UCP_CS: + return getUcpCs(); + case CLASS_UCP_L: + return getUcpL(); + case CLASS_UCP_L_AND: + return getUcpL_and(); + case CLASS_UCP_LL: + return getUcpLl(); + case CLASS_UCP_LM: + return getUcpLm(); + case CLASS_UCP_LO: + return getUcpLo(); + case CLASS_UCP_LT: + return getUcpLt(); + case CLASS_UCP_LU: + return getUcpLu(); + case CLASS_UCP_M: + return getUcpM(); + case CLASS_UCP_MC: + return getUcpMc(); + case CLASS_UCP_ME: + return getUcpMe(); + case CLASS_UCP_MN: + return getUcpMn(); + case CLASS_UCP_N: + return getUcpN(); + case CLASS_UCP_ND: + return getUcpNd(); + case CLASS_UCP_NL: + return getUcpNl(); + case CLASS_UCP_NO: + return getUcpNo(); + case CLASS_UCP_P: + return getUcpP(); + case CLASS_UCP_PC: + return getUcpPc(); + case CLASS_UCP_PD: + return getUcpPd(); + case CLASS_UCP_PE: + return getUcpPe(); + case CLASS_UCP_PF: + return getUcpPf(); + case CLASS_UCP_PI: + return getUcpPi(); + case CLASS_UCP_PO: + return getUcpPo(); + case CLASS_UCP_PS: + return getUcpPs(); + case CLASS_UCP_S: + return getUcpS(); + case CLASS_UCP_SC: + return getUcpSc(); + case CLASS_UCP_SK: + return getUcpSk(); + case CLASS_UCP_SM: + return getUcpSm(); + case CLASS_UCP_SO: + return getUcpSo(); + case CLASS_UCP_XAN: + return getUcpXan(); + case CLASS_UCP_XWD: + return getUcpXwd(); + case CLASS_UCP_Z: + return getUcpZ(); + case CLASS_UCP_ZL: + return getUcpZl(); + case CLASS_UCP_ZP: + return getUcpZp(); + case CLASS_UCP_ZS: + return getUcpZs(); + case CLASS_SCRIPT_ARABIC: + return getUcpArabic(); + case CLASS_SCRIPT_ARMENIAN: + return getUcpArmenian(); + case CLASS_SCRIPT_AVESTAN: + return getUcpAvestan(); + case CLASS_SCRIPT_BALINESE: + return getUcpBalinese(); + case CLASS_SCRIPT_BAMUM: + return getUcpBamum(); + case CLASS_SCRIPT_BATAK: + return getUcpBatak(); + case CLASS_SCRIPT_BENGALI: + return getUcpBengali(); + case CLASS_SCRIPT_BOPOMOFO: + return getUcpBopomofo(); + case CLASS_SCRIPT_BRAHMI: + return getUcpBrahmi(); + case CLASS_SCRIPT_BRAILLE: + return getUcpBraille(); + case CLASS_SCRIPT_BUGINESE: + return getUcpBuginese(); + case CLASS_SCRIPT_BUHID: + return getUcpBuhid(); + case CLASS_SCRIPT_CANADIAN_ABORIGINAL: + return getUcpCanadian_Aboriginal(); + case CLASS_SCRIPT_CARIAN: + return getUcpCarian(); + case CLASS_SCRIPT_CHAM: + return getUcpCham(); + case CLASS_SCRIPT_CHEROKEE: + return getUcpCherokee(); + case CLASS_SCRIPT_COMMON: + return getUcpCommon(); + case CLASS_SCRIPT_COPTIC: + return getUcpCoptic(); + case CLASS_SCRIPT_CUNEIFORM: + return getUcpCuneiform(); + case CLASS_SCRIPT_CYPRIOT: + return getUcpCypriot(); + case CLASS_SCRIPT_CYRILLIC: + return getUcpCyrillic(); + case CLASS_SCRIPT_DESERET: + return getUcpDeseret(); + case CLASS_SCRIPT_DEVANAGARI: + return getUcpDevanagari(); + case CLASS_SCRIPT_EGYPTIAN_HIEROGLYPHS: + return getUcpEgyptian_Hieroglyphs(); + case CLASS_SCRIPT_ETHIOPIC: + return getUcpEthiopic(); + case CLASS_SCRIPT_GEORGIAN: + return getUcpGeorgian(); + case CLASS_SCRIPT_GLAGOLITIC: + return getUcpGlagolitic(); + case CLASS_SCRIPT_GOTHIC: + return getUcpGothic(); + case CLASS_SCRIPT_GREEK: + return getUcpGreek(); + case CLASS_SCRIPT_GUJARATI: + return getUcpGujarati(); + case CLASS_SCRIPT_GURMUKHI: + return getUcpGurmukhi(); + case CLASS_SCRIPT_HAN: + return getUcpHan(); + case CLASS_SCRIPT_HANGUL: + return getUcpHangul(); + case CLASS_SCRIPT_HANUNOO: + return getUcpHanunoo(); + case CLASS_SCRIPT_HEBREW: + return getUcpHebrew(); + case CLASS_SCRIPT_HIRAGANA: + return getUcpHiragana(); + case CLASS_SCRIPT_IMPERIAL_ARAMAIC: + return getUcpImperial_Aramaic(); + case CLASS_SCRIPT_INHERITED: + return getUcpInherited(); + case CLASS_SCRIPT_INSCRIPTIONAL_PAHLAVI: + return getUcpInscriptional_Pahlavi(); + case CLASS_SCRIPT_INSCRIPTIONAL_PARTHIAN: + return getUcpInscriptional_Parthian(); + case CLASS_SCRIPT_JAVANESE: + return getUcpJavanese(); + case CLASS_SCRIPT_KAITHI: + return getUcpKaithi(); + case CLASS_SCRIPT_KANNADA: + return getUcpKannada(); + case CLASS_SCRIPT_KATAKANA: + return getUcpKatakana(); + case CLASS_SCRIPT_KAYAH_LI: + return getUcpKayah_Li(); + case CLASS_SCRIPT_KHAROSHTHI: + return getUcpKharoshthi(); + case CLASS_SCRIPT_KHMER: + return getUcpKhmer(); + case CLASS_SCRIPT_LAO: + return getUcpLao(); + case CLASS_SCRIPT_LATIN: + return getUcpLatin(); + case CLASS_SCRIPT_LEPCHA: + return getUcpLepcha(); + case CLASS_SCRIPT_LIMBU: + return getUcpLimbu(); + case CLASS_SCRIPT_LINEAR_B: + return getUcpLinear_B(); + case CLASS_SCRIPT_LISU: + return getUcpLisu(); + case CLASS_SCRIPT_LYCIAN: + return getUcpLycian(); + case CLASS_SCRIPT_LYDIAN: + return getUcpLydian(); + case CLASS_SCRIPT_MALAYALAM: + return getUcpMalayalam(); + case CLASS_SCRIPT_MANDAIC: + return getUcpMandaic(); + case CLASS_SCRIPT_MEETEI_MAYEK: + return getUcpMeetei_Mayek(); + case CLASS_SCRIPT_MONGOLIAN: + return getUcpMongolian(); + case CLASS_SCRIPT_MYANMAR: + return getUcpMyanmar(); + case CLASS_SCRIPT_NEW_TAI_LUE: + return getUcpNew_Tai_Lue(); + case CLASS_SCRIPT_NKO: + return getUcpNko(); + case CLASS_SCRIPT_OGHAM: + return getUcpOgham(); + case CLASS_SCRIPT_OL_CHIKI: + return getUcpOl_Chiki(); + case CLASS_SCRIPT_OLD_ITALIC: + return getUcpOld_Italic(); + case CLASS_SCRIPT_OLD_PERSIAN: + return getUcpOld_Persian(); + case CLASS_SCRIPT_OLD_SOUTH_ARABIAN: + return getUcpOld_South_Arabian(); + case CLASS_SCRIPT_OLD_TURKIC: + return getUcpOld_Turkic(); + case CLASS_SCRIPT_ORIYA: + return getUcpOriya(); + case CLASS_SCRIPT_OSMANYA: + return getUcpOsmanya(); + case CLASS_SCRIPT_PHAGS_PA: + return getUcpPhags_Pa(); + case CLASS_SCRIPT_PHOENICIAN: + return getUcpPhoenician(); + case CLASS_SCRIPT_REJANG: + return getUcpRejang(); + case CLASS_SCRIPT_RUNIC: + return getUcpRunic(); + case CLASS_SCRIPT_SAMARITAN: + return getUcpSamaritan(); + case CLASS_SCRIPT_SAURASHTRA: + return getUcpSaurashtra(); + case CLASS_SCRIPT_SHAVIAN: + return getUcpShavian(); + case CLASS_SCRIPT_SINHALA: + return getUcpSinhala(); + case CLASS_SCRIPT_SUNDANESE: + return getUcpSundanese(); + case CLASS_SCRIPT_SYLOTI_NAGRI: + return getUcpSyloti_Nagri(); + case CLASS_SCRIPT_SYRIAC: + return getUcpSyriac(); + case CLASS_SCRIPT_TAGALOG: + return getUcpTagalog(); + case CLASS_SCRIPT_TAGBANWA: + return getUcpTagbanwa(); + case CLASS_SCRIPT_TAI_LE: + return getUcpTai_Le(); + case CLASS_SCRIPT_TAI_THAM: + return getUcpTai_Tham(); + case CLASS_SCRIPT_TAI_VIET: + return getUcpTai_Viet(); + case CLASS_SCRIPT_TAMIL: + return getUcpTamil(); + case CLASS_SCRIPT_TELUGU: + return getUcpTelugu(); + case CLASS_SCRIPT_THAANA: + return getUcpThaana(); + case CLASS_SCRIPT_THAI: + return getUcpThai(); + case CLASS_SCRIPT_TIBETAN: + return getUcpTibetan(); + case CLASS_SCRIPT_TIFINAGH: + return getUcpTifinagh(); + case CLASS_SCRIPT_UGARITIC: + return getUcpUgaritic(); + case CLASS_SCRIPT_VAI: + return getUcpVai(); + case CLASS_SCRIPT_YI: + return getUcpYi(); + case CLASS_UCP_ANY: + return CodePointSet(CodePointSet::interval(0, MAX_UNICODE)); + + default: { /* currently uses ascii defns */ + CharReach cr = getPredefinedCharReach(c, mode); + CodePointSet rv; + for (u32 i = cr.find_first(); i != CharReach::npos; + i = cr.find_next(i)) { + rv.set(i); + } + return rv; + } + } +} + +UTF8ComponentClass::UTF8ComponentClass(const ParseMode &mode_in) + : ComponentClass(mode_in), + single_pos( GlushkovBuildState::POS_UNINITIALIZED), + one_dot_trailer( GlushkovBuildState::POS_UNINITIALIZED), + two_dot_trailer( GlushkovBuildState::POS_UNINITIALIZED), + three_dot_trailer( GlushkovBuildState::POS_UNINITIALIZED), + two_char_dot_head( GlushkovBuildState::POS_UNINITIALIZED), + three_char_dot_head(GlushkovBuildState::POS_UNINITIALIZED), + four_char_dot_head( GlushkovBuildState::POS_UNINITIALIZED) { + assert(mode.utf8); +} + +UTF8ComponentClass *UTF8ComponentClass::clone() const { + return new UTF8ComponentClass(*this); +} + +bool UTF8ComponentClass::class_empty(void) const { + assert(finalized); + return cps.none(); +} + +void UTF8ComponentClass::createRange(unichar to) { + assert(range_start != INVALID_UNICODE); + unichar from = range_start; + if (from > to) { + throw LocatedParseError("Range out of order in character class"); + } + + in_cand_range = false; + CodePointSet ncps; + ncps.setRange(from, to); + if (mode.caseless) { + make_caseless(&ncps); + } + cps |= ncps; + range_start = INVALID_UNICODE; +} + +void UTF8ComponentClass::add(PredefinedClass c, bool negative) { + if (in_cand_range) { // can't form a range here + throw LocatedParseError("Invalid range in character class"); + } + + if (mode.ucp) { + c = translateForUcpMode(c, mode); + } + + // caselessness is handled inside this call - don't apply make_caseless + // to the result + CodePointSet pcps = getPredefinedCodePointSet(c, mode); + if (negative) { + pcps.flip(); + } + + cps |= pcps; + + range_start = INVALID_UNICODE; + in_cand_range = false; +} + +void UTF8ComponentClass::add(unichar c) { + DEBUG_PRINTF("adding \\x%08x\n", c); + if (c > MAX_UNICODE) { // too big! + throw LocatedParseError("Hexadecimal value is greater than \\x10FFFF"); + } + + if (in_cand_range) { + createRange(c); + return; + } + + CodePointSet ncps; + ncps.set(c); + if (mode.caseless) { + make_caseless(&ncps); + } + cps |= ncps; + range_start = c; +} + +void UTF8ComponentClass::finalize() { + if (finalized) { + return; + } + + // Handle unclosed ranges, like '[a-]' and '[a-\Q\E]' -- in these cases the + // dash is a literal dash. + if (in_cand_range) { + cps.set('-'); + in_cand_range = false; + } + + if (m_negate) { + cps.flip(); + } + + finalized = true; +} + +Position UTF8ComponentClass::getHead(NFABuilder &builder, u8 first_byte) { + map<u8, Position>::const_iterator it = heads.find(first_byte); + if (it != heads.end()) { + return it->second; + } + + Position head = builder.makePositions(1); + assert(heads.find(first_byte) == heads.end()); + builder.addCharReach(head, CharReach(first_byte)); + /* no report id as head can not be directly wired to accept */ + + heads[first_byte] = head; + return head; +} + +void UTF8ComponentClass::ensureDotTrailer(GlushkovBuildState &bs) { + NFABuilder &builder = bs.getBuilder(); + if (one_dot_trailer != GlushkovBuildState::POS_UNINITIALIZED) { + return; + } + + one_dot_trailer = builder.makePositions(1); + builder.setNodeReportID(one_dot_trailer, 0); + builder.addCharReach(one_dot_trailer, CharReach(0x80, 0xbf)); + tails.insert(one_dot_trailer); +} + +void UTF8ComponentClass::ensureTwoDotTrailer(GlushkovBuildState &bs) { + NFABuilder &builder = bs.getBuilder(); + if (two_dot_trailer != GlushkovBuildState::POS_UNINITIALIZED) { + return; + } + + ensureDotTrailer(bs); + + two_dot_trailer = builder.makePositions(1); + builder.addCharReach(two_dot_trailer, CharReach(0x80, 0xbf)); + bs.addSuccessor(two_dot_trailer, one_dot_trailer); +} + +void UTF8ComponentClass::ensureThreeDotTrailer(GlushkovBuildState &bs) { + NFABuilder &builder = bs.getBuilder(); + if (three_dot_trailer != GlushkovBuildState::POS_UNINITIALIZED) { + return; + } + + ensureTwoDotTrailer(bs); + + three_dot_trailer = builder.makePositions(1); + builder.addCharReach(three_dot_trailer, CharReach(0x80, 0xbf)); + bs.addSuccessor(three_dot_trailer, two_dot_trailer); +} + +void UTF8ComponentClass::buildOneByte(GlushkovBuildState &bs) { + NFABuilder &builder = bs.getBuilder(); + for (CodePointSet::const_iterator it = cps.begin(); it != cps.end(); ++it) { + unichar b = lower(*it); + unichar e = upper(*it) + 1; + if (b >= UTF_2CHAR_MIN) { + continue; + } + + DEBUG_PRINTF("building vertices for [%u, %u)\n", b, e); + + if (single_pos == GlushkovBuildState::POS_UNINITIALIZED) { + single_pos = builder.makePositions(1); + builder.setNodeReportID(single_pos, 0 /* offset adj */); + tails.insert(single_pos); + } + CharReach cr(b, MIN(e, UTF_2CHAR_MIN) - 1); + builder.addCharReach(single_pos, cr); + } +} + +void UTF8ComponentClass::addToTail(GlushkovBuildState &bs, + map<Position, Position> &finals, + Position prev, unichar b, unichar e) { + NFABuilder &builder = bs.getBuilder(); + Position tail; + if (finals.find(prev) == finals.end()) { + tail = builder.makePositions(1); + builder.setNodeReportID(tail, 0 /* offset adj */); + bs.addSuccessor(prev, tail); + finals[prev] = tail; + tails.insert(tail); + } else { + tail = finals[prev]; + } + + u8 bb = makeContByte(b); + u8 ee = makeContByte(e - 1); + builder.addCharReach(tail, CharReach(bb, ee)); +} + +void UTF8ComponentClass::buildTwoByte(GlushkovBuildState &bs) { + NFABuilder &builder = bs.getBuilder(); + map<Position, Position> finals; + + for (auto it = cps.begin(); it != cps.end(); ++it) { + unichar b = lower(*it); + unichar e = upper(*it) + 1; + + b = MAX(b, UTF_2CHAR_MIN); + e = MIN(e, UTF_3CHAR_MIN); + + if (b >= e) { + continue; /* we're done here */ + } + + /* raise b to the start of the next tail byte boundary */ + if (b & UTF_CONT_BYTE_VALUE_MASK) { + unichar bb = MIN(e, ROUNDUP_N(b, UTF_CONT_BYTE_RANGE)); + u8 first_byte = UTF_TWO_BYTE_HEADER | (b >> UTF_CONT_SHIFT); + assert(first_byte > 0xc1 && first_byte <= 0xdf); + + Position head = getHead(builder, first_byte); + addToTail(bs, finals, head, b, bb); + + b = bb; + } + + if (b == e) { + continue; /* we're done here */ + } + assert(b < e); + + /* lower e to the end of a tail byte boundary */ + if (e & UTF_CONT_BYTE_VALUE_MASK) { + unichar ee = e & ~UTF_CONT_BYTE_VALUE_MASK; + assert(ee >= b); + + u8 first_byte = UTF_TWO_BYTE_HEADER | (ee >> UTF_CONT_SHIFT); + assert(first_byte > 0xc1 && first_byte <= 0xdf); + + Position head = getHead(builder, first_byte); + addToTail(bs, finals, head, ee, e); + + e = ee; + } + + if (b == e) { + continue; /* we're done here */ + } + assert(b < e); + + /* middle section just goes to a common full vertex */ + ensureDotTrailer(bs); + + if (two_char_dot_head == GlushkovBuildState::POS_UNINITIALIZED) { + two_char_dot_head = builder.makePositions(1); + bs.addSuccessor(two_char_dot_head, one_dot_trailer); + } + + u8 min_first_byte = UTF_TWO_BYTE_HEADER | (b >> UTF_CONT_SHIFT); + u8 max_first_byte = UTF_TWO_BYTE_HEADER | ((e - 1) >> UTF_CONT_SHIFT); + + assert(min_first_byte > 0xc1 && min_first_byte <= 0xdf); + assert(max_first_byte > 0xc1 && max_first_byte <= 0xdf); + + builder.addCharReach(two_char_dot_head, + CharReach(min_first_byte, max_first_byte)); + } +} + +static +Position getMid(GlushkovBuildState &bs, map<Position, map<u8, Position> > &mids, + const Position &prev, u8 byte_val) { + NFABuilder &builder = bs.getBuilder(); + map<u8, Position> &by_byte = mids[prev]; + + map<u8, Position>::const_iterator it = by_byte.find(byte_val); + if (it != by_byte.end()) { + return it->second; + } + + Position mid = builder.makePositions(1); + builder.addCharReach(mid, CharReach(byte_val)); + bs.addSuccessor(prev, mid); + /* no report id as mid can not be directly wired to accept */ + + by_byte[byte_val] = mid; + return mid; +} + +void UTF8ComponentClass::buildThreeByte(GlushkovBuildState &bs) { + NFABuilder &builder = bs.getBuilder(); + + map<Position, map<u8, Position> > mids; + map<Position, Position> finals; + + for (auto it = cps.begin(); it != cps.end(); ++it) { + unichar b = lower(*it); + unichar e = upper(*it) + 1; + + b = MAX(b, UTF_3CHAR_MIN); + e = MIN(e, UTF_4CHAR_MIN); + + if (b >= e) { + continue; /* we're done here */ + } + + /* raise b to the start of the next tail byte boundary */ + if (b & UTF_CONT_BYTE_VALUE_MASK) { + unichar bb = MIN(e, ROUNDUP_N(b, UTF_CONT_BYTE_RANGE)); + + u8 first_byte = UTF_THREE_BYTE_HEADER | (b >> (2 * UTF_CONT_SHIFT)); + assert(first_byte >= 0xe0 && first_byte <= 0xef); + Position head = getHead(builder, first_byte); + + u8 second_byte = makeContByte(b >> UTF_CONT_SHIFT); + Position mid = getMid(bs, mids, head, second_byte); + + addToTail(bs, finals, mid, b, bb); + + b = bb; + } + + if (b == e) { + continue; /* we're done here */ + } + assert(b < e); + + /* lower e to the end of a tail byte boundary */ + if (e & UTF_CONT_BYTE_VALUE_MASK) { + unichar ee = e & ~UTF_CONT_BYTE_VALUE_MASK; + assert(ee >= b); + + u8 first_byte = UTF_THREE_BYTE_HEADER + | (ee >> (2 * UTF_CONT_SHIFT)); + assert(first_byte >= 0xe0 && first_byte <= 0xef); + Position head = getHead(builder, first_byte); + + u8 second_byte = makeContByte(ee >> UTF_CONT_SHIFT); + Position mid = getMid(bs, mids, head, second_byte); + + addToTail(bs, finals, mid, ee, e); + + e = ee; + } + + if (b == e) { + continue; /* we're done here */ + } + assert(b < e); + + /* from here on in the last byte is always full */ + ensureDotTrailer(bs); + + /* raise b to the start of the next mid byte boundary */ + if (b & ((1 << (2 * UTF_CONT_SHIFT)) - 1)) { + unichar bb = MIN(e, ROUNDUP_N(b, 1 << (2 * UTF_CONT_SHIFT))); + + u8 first_byte = UTF_THREE_BYTE_HEADER | (b >> (2 * UTF_CONT_SHIFT)); + Position head = getHead(builder, first_byte); + + Position mid = builder.makePositions(1); + bs.addSuccessor(head, mid); + bs.addSuccessor(mid, one_dot_trailer); + /* no report id as mid can not be directly wired to accept, + * not adding to mids as we are completely filling its downstream */ + u8 second_min = makeContByte(b >> UTF_CONT_SHIFT); + u8 second_max = makeContByte((bb - 1) >> UTF_CONT_SHIFT); + + builder.addCharReach(mid, CharReach(second_min, second_max)); + + b = bb; + } + + if (b == e) { + continue; /* we're done here */ + } + assert(b < e); + + /* lower e to the end of a mid byte boundary */ + if (e & ((1 << (2 * UTF_CONT_SHIFT)) - 1)) { + unichar ee = e & ~((1 << (2 * UTF_CONT_SHIFT)) - 1); + assert(ee >= b); + + u8 first_byte = UTF_THREE_BYTE_HEADER + | (ee >> (2 * UTF_CONT_SHIFT)); + Position head = getHead(builder, first_byte); + + Position mid = builder.makePositions(1); + bs.addSuccessor(head, mid); + bs.addSuccessor(mid, one_dot_trailer); + /* no report id as mid can not be directly wired to accept, + * not adding to mids as we are completely filling its downstream */ + u8 second_min = makeContByte(ee >> UTF_CONT_SHIFT); + u8 second_max = makeContByte((e - 1) >> UTF_CONT_SHIFT); + + builder.addCharReach(mid, CharReach(second_min, second_max)); + + e = ee; + } + + if (b == e) { + continue; /* we're done here */ + } + assert(b < e); + + /* now we just have to wire head to a common dot trailer */ + ensureTwoDotTrailer(bs); + if (three_char_dot_head == GlushkovBuildState::POS_UNINITIALIZED) { + three_char_dot_head = builder.makePositions(1); + bs.addSuccessor(three_char_dot_head, two_dot_trailer); + } + + u8 min_first_byte = UTF_THREE_BYTE_HEADER + | (b >> (2 * UTF_CONT_SHIFT)); + u8 max_first_byte = UTF_THREE_BYTE_HEADER + | ((e - 1) >> (2 * UTF_CONT_SHIFT)); + + assert(min_first_byte > 0xdf && min_first_byte <= 0xef); + assert(max_first_byte > 0xdf && max_first_byte <= 0xef); + + builder.addCharReach(three_char_dot_head, + CharReach(min_first_byte, max_first_byte)); + } +} + +static +u8 makeFirstByteOfFour(unichar raw) { + u8 first_byte = UTF_FOUR_BYTE_HEADER | (raw >> (3 * UTF_CONT_SHIFT)); + assert(first_byte > 0xef && first_byte <= 0xf7); + return first_byte; +} + +static +bool isTwoContAligned(unichar raw) { + return !(raw & ((1 << (2 * UTF_CONT_SHIFT)) - 1)); +} + +static +bool isThreeContAligned(unichar raw) { + return !(raw & ((1 << (3 * UTF_CONT_SHIFT)) - 1)); +} + +void UTF8ComponentClass::buildFourByte(GlushkovBuildState &bs) { + NFABuilder &builder = bs.getBuilder(); + map<Position, map<u8, Position> > mids; + map<Position, Position> finals; + + for (auto it = cps.begin(); it != cps.end(); ++it) { + unichar b = lower(*it); + unichar e = upper(*it) + 1; + + b = MAX(b, UTF_4CHAR_MIN); + e = MIN(e, MAX_UNICODE + 1); + + if (b >= e) { + continue; + } + + /* raise b to the start of the next tail byte boundary */ + if (b & UTF_CONT_BYTE_VALUE_MASK) { + unichar bb = MIN(e, ROUNDUP_N(b, UTF_CONT_BYTE_RANGE)); + + u8 first_byte = makeFirstByteOfFour(b); + Position head = getHead(builder, first_byte); + + u8 second_byte = makeContByte(b >> (2 * UTF_CONT_SHIFT)); + Position mid1 = getMid(bs, mids, head, second_byte); + + u8 third_byte = makeContByte(b >> UTF_CONT_SHIFT); + Position mid2 = getMid(bs, mids, mid1, third_byte); + + addToTail(bs, finals, mid2, b, bb); + + b = bb; + } + + if (b == e) { + continue; /* we're done here */ + } + assert(b < e); + + /* lower e to the end of a tail byte boundary */ + if (e & UTF_CONT_BYTE_VALUE_MASK) { + unichar ee = e & ~UTF_CONT_BYTE_VALUE_MASK; + assert(ee >= b); + + u8 first_byte = makeFirstByteOfFour(ee); + Position head = getHead(builder, first_byte); + + u8 second_byte = makeContByte(ee >> (2 * UTF_CONT_SHIFT)); + Position mid1 = getMid(bs, mids, head, second_byte); + + u8 third_byte = makeContByte(ee >> UTF_CONT_SHIFT); + Position mid2 = getMid(bs, mids, mid1, third_byte); + + addToTail(bs, finals, mid2, ee, e); + + e = ee; + } + + if (b == e) { + continue; /* we're done here */ + } + assert(b < e); + + /* from here on in the last byte is always full */ + ensureDotTrailer(bs); + + /* raise b to the start of the next mid byte boundary */ + if (!isTwoContAligned(b)) { + unichar bb = MIN(e, ROUNDUP_N(b, 1 << (2 * UTF_CONT_SHIFT))); + + u8 first_byte = makeFirstByteOfFour(b); + Position head = getHead(builder, first_byte); + + u8 second_byte = makeContByte(b >> (2 * UTF_CONT_SHIFT)); + Position mid1 = getMid(bs, mids, head, second_byte); + + Position mid2 = builder.makePositions(1); + bs.addSuccessor(mid1, mid2); + bs.addSuccessor(mid2, one_dot_trailer); + /* no report id as mid can not be directly wired to accept, + * not adding to mids as we are completely filling its downstream */ + u8 byte_min = makeContByte(b >> UTF_CONT_SHIFT); + u8 byte_max = makeContByte((bb - 1) >> UTF_CONT_SHIFT); + + builder.addCharReach(mid2, CharReach(byte_min, byte_max)); + + b = bb; + } + + if (b == e) { + continue; /* we're done here */ + } + assert(b < e); + + /* lower e to the end of a mid byte boundary */ + if (!isTwoContAligned(e)) { + unichar ee = e & ~((1 << (2 * UTF_CONT_SHIFT)) - 1); + assert(ee >= b); + + u8 first_byte = makeFirstByteOfFour(ee); + Position head = getHead(builder, first_byte); + + u8 second_byte = makeContByte(ee >> (2 * UTF_CONT_SHIFT)); + Position mid1 = getMid(bs, mids, head, second_byte); + + Position mid2 = builder.makePositions(1); + bs.addSuccessor(mid1, mid2); + bs.addSuccessor(mid2, one_dot_trailer); + /* no report id as mid can not be directly wired to accept, + * not adding to mids as we are completely filling its downstream */ + u8 byte_min = makeContByte(ee >> UTF_CONT_SHIFT); + u8 byte_max = makeContByte((e - 1) >> UTF_CONT_SHIFT); + + builder.addCharReach(mid2, CharReach(byte_min, byte_max)); + + e = ee; + } + + if (b == e) { + continue; /* we're done here */ + } + assert(b < e); + + ensureTwoDotTrailer(bs); + + /* raise b to the next byte boundary */ + if (!isThreeContAligned(b)) { + unichar bb = MIN(e, ROUNDUP_N(b, 1 << (3 * UTF_CONT_SHIFT))); + + u8 first_byte = makeFirstByteOfFour(b); + Position head = getHead(builder, first_byte); + + Position mid1 = builder.makePositions(1); + bs.addSuccessor(head, mid1); + bs.addSuccessor(mid1, two_dot_trailer); + /* no report id as mid can not be directly wired to accept, + * not adding to mids as we are completely filling its downstream */ + u8 byte_min = makeContByte(b >> (2 * UTF_CONT_SHIFT)); + u8 byte_max = makeContByte((bb - 1) >> (2 * UTF_CONT_SHIFT)); + + builder.addCharReach(mid1, CharReach(byte_min, byte_max)); + + b = bb; + } + + if (b == e) { + continue; /* we're done here */ + } + assert(b < e); + + /* lower e to the next byte boundary */ + if (!isThreeContAligned(e)) { + unichar ee = e & ~((1 << (3 * UTF_CONT_SHIFT)) - 1); + assert(ee >= b); + + u8 first_byte = makeFirstByteOfFour(ee); + Position head = getHead(builder, first_byte); + Position mid1 = builder.makePositions(1); + bs.addSuccessor(head, mid1); + bs.addSuccessor(mid1, two_dot_trailer); + /* no report id as mid can not be directly wired to accept, + * not adding to mids as we are completely filling its downstream */ + u8 byte_min = makeContByte(ee >> (2 * UTF_CONT_SHIFT)); + u8 byte_max = makeContByte((e - 1) >> (2 * UTF_CONT_SHIFT)); + + builder.addCharReach(mid1, CharReach(byte_min, byte_max)); + + e = ee; + } + + if (b == e) { + continue; /* we're done here */ + } + assert(b < e); + + /* now we just have to wire head to a common dot trailer */ + ensureThreeDotTrailer(bs); + if (four_char_dot_head == GlushkovBuildState::POS_UNINITIALIZED) { + four_char_dot_head = builder.makePositions(1); + bs.addSuccessor(four_char_dot_head, three_dot_trailer); + } + + u8 min_first_byte = makeFirstByteOfFour(b); + u8 max_first_byte = makeFirstByteOfFour(e - 1); + + builder.addCharReach(four_char_dot_head, + CharReach(min_first_byte, max_first_byte)); + } +} + +void UTF8ComponentClass::notePositions(GlushkovBuildState &bs) { + // We should always be finalized by now. + assert(finalized); + + // An empty class is a special case; this would be generated by something + // like /[\s\S]/8, which can never match. We treat these like we do the non + // UTF-8 version: add a vertex with empty reach (to ensure we create a + // connected graph) and pick it up later on. + if (class_empty()) { + DEBUG_PRINTF("empty class!\n"); + assert(single_pos == GlushkovBuildState::POS_UNINITIALIZED); + NFABuilder &builder = bs.getBuilder(); + single_pos = builder.makePositions(1); + builder.setNodeReportID(single_pos, 0 /* offset adj */); + builder.addCharReach(single_pos, CharReach()); + tails.insert(single_pos); + return; + } + + buildOneByte(bs); + buildTwoByte(bs); + buildThreeByte(bs); + buildFourByte(bs); +} + +void UTF8ComponentClass::buildFollowSet(GlushkovBuildState &, + const vector<PositionInfo> &) { + /* states are wired in notePositions as all belong to this component. */ +} + +vector<PositionInfo> UTF8ComponentClass::first(void) const { + vector<PositionInfo> rv; + if (single_pos != GlushkovBuildState::POS_UNINITIALIZED) { + rv.push_back(single_pos); + } + if (two_char_dot_head != GlushkovBuildState::POS_UNINITIALIZED) { + rv.push_back(two_char_dot_head); + } + if (three_char_dot_head != GlushkovBuildState::POS_UNINITIALIZED) { + rv.push_back(three_char_dot_head); + } + if (four_char_dot_head != GlushkovBuildState::POS_UNINITIALIZED) { + rv.push_back(four_char_dot_head); + } + + for (auto it = heads.begin(); it != heads.end(); ++it) { + rv.push_back(it->second); + } + return rv; +} + +vector<PositionInfo> UTF8ComponentClass::last(void) const { + vector<PositionInfo> rv; + + rv.insert(rv.end(), tails.begin(), tails.end()); + return rv; +} + +} // namespace ue2 diff --git a/contrib/libs/hyperscan/src/parser/Utf8ComponentClass.h b/contrib/libs/hyperscan/src/parser/Utf8ComponentClass.h index 72c16b0095..f4e7ea328d 100644 --- a/contrib/libs/hyperscan/src/parser/Utf8ComponentClass.h +++ b/contrib/libs/hyperscan/src/parser/Utf8ComponentClass.h @@ -1,116 +1,116 @@ -/* - * Copyright (c) 2015, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -/** \file - * \brief Character class in UTF-8 mode. - */ - -#ifndef UTF8_COMPONENT_CLASS_H -#define UTF8_COMPONENT_CLASS_H - -#include "ComponentClass.h" -#include "ue2common.h" -#include "util/unicode_set.h" - -#include <map> -#include <set> -#include <string> -#include <vector> - -namespace ue2 { - -class UTF8ComponentClass : public ComponentClass { - friend class DumpVisitor; - friend class PrintVisitor; - friend class CaselessVisitor; - friend class SimplifyVisitor; - friend class SimplifyCandidatesVisitor; -public: - explicit UTF8ComponentClass(const ParseMode &mode); - ~UTF8ComponentClass() override {} - UTF8ComponentClass *clone() const override; - - Component *accept(ComponentVisitor &v) override { - Component *c = v.visit(this); - v.post(this); - return c; - } - - void accept(ConstComponentVisitor &v) const override { - v.pre(*this); - v.during(*this); - v.post(*this); - } - - bool class_empty(void) const override; - void add(PredefinedClass c, bool negative) override; - void add(unichar c) override; - void finalize(void) override; - void notePositions(GlushkovBuildState &bs) override; - void buildFollowSet(GlushkovBuildState &bs, - const std::vector<PositionInfo> &) override; - std::vector<PositionInfo> first(void) const override; - std::vector<PositionInfo> last(void) const override; - -protected: - void createRange(unichar to) override; - -private: - Position getHead(NFABuilder &builder, u8 first_byte); - void addToTail(GlushkovBuildState &bs, std::map<Position, Position> &finals, - Position prev, unichar b, unichar e); - void ensureDotTrailer(GlushkovBuildState &bs); - void ensureTwoDotTrailer(GlushkovBuildState &bs); - void ensureThreeDotTrailer(GlushkovBuildState &bs); - void buildOneByte(GlushkovBuildState &bs); - void buildTwoByte(GlushkovBuildState &bs); - void buildThreeByte(GlushkovBuildState &bs); - void buildFourByte(GlushkovBuildState &bs); - - CodePointSet cps; - - std::map<u8, Position> heads; - Position single_pos; - Position one_dot_trailer; - Position two_dot_trailer; - Position three_dot_trailer; - - Position two_char_dot_head; - Position three_char_dot_head; - Position four_char_dot_head; - std::set<Position> tails; -}; - -PredefinedClass translateForUcpMode(PredefinedClass in, const ParseMode &mode); - -CodePointSet getPredefinedCodePointSet(PredefinedClass c, - const ParseMode &mode); - -} // namespace - -#endif // UTF8_COMPONENT_CLASS_H +/* + * Copyright (c) 2015, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/** \file + * \brief Character class in UTF-8 mode. + */ + +#ifndef UTF8_COMPONENT_CLASS_H +#define UTF8_COMPONENT_CLASS_H + +#include "ComponentClass.h" +#include "ue2common.h" +#include "util/unicode_set.h" + +#include <map> +#include <set> +#include <string> +#include <vector> + +namespace ue2 { + +class UTF8ComponentClass : public ComponentClass { + friend class DumpVisitor; + friend class PrintVisitor; + friend class CaselessVisitor; + friend class SimplifyVisitor; + friend class SimplifyCandidatesVisitor; +public: + explicit UTF8ComponentClass(const ParseMode &mode); + ~UTF8ComponentClass() override {} + UTF8ComponentClass *clone() const override; + + Component *accept(ComponentVisitor &v) override { + Component *c = v.visit(this); + v.post(this); + return c; + } + + void accept(ConstComponentVisitor &v) const override { + v.pre(*this); + v.during(*this); + v.post(*this); + } + + bool class_empty(void) const override; + void add(PredefinedClass c, bool negative) override; + void add(unichar c) override; + void finalize(void) override; + void notePositions(GlushkovBuildState &bs) override; + void buildFollowSet(GlushkovBuildState &bs, + const std::vector<PositionInfo> &) override; + std::vector<PositionInfo> first(void) const override; + std::vector<PositionInfo> last(void) const override; + +protected: + void createRange(unichar to) override; + +private: + Position getHead(NFABuilder &builder, u8 first_byte); + void addToTail(GlushkovBuildState &bs, std::map<Position, Position> &finals, + Position prev, unichar b, unichar e); + void ensureDotTrailer(GlushkovBuildState &bs); + void ensureTwoDotTrailer(GlushkovBuildState &bs); + void ensureThreeDotTrailer(GlushkovBuildState &bs); + void buildOneByte(GlushkovBuildState &bs); + void buildTwoByte(GlushkovBuildState &bs); + void buildThreeByte(GlushkovBuildState &bs); + void buildFourByte(GlushkovBuildState &bs); + + CodePointSet cps; + + std::map<u8, Position> heads; + Position single_pos; + Position one_dot_trailer; + Position two_dot_trailer; + Position three_dot_trailer; + + Position two_char_dot_head; + Position three_char_dot_head; + Position four_char_dot_head; + std::set<Position> tails; +}; + +PredefinedClass translateForUcpMode(PredefinedClass in, const ParseMode &mode); + +CodePointSet getPredefinedCodePointSet(PredefinedClass c, + const ParseMode &mode); + +} // namespace + +#endif // UTF8_COMPONENT_CLASS_H diff --git a/contrib/libs/hyperscan/src/parser/buildstate.cpp b/contrib/libs/hyperscan/src/parser/buildstate.cpp index 3e2cbd9a39..75cfbb7b2d 100644 --- a/contrib/libs/hyperscan/src/parser/buildstate.cpp +++ b/contrib/libs/hyperscan/src/parser/buildstate.cpp @@ -1,529 +1,529 @@ -/* +/* * Copyright (c) 2015-2017, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -/** \file - * \brief Glushkov construction. - */ -#include "buildstate.h" -#include "position.h" -#include "position_dump.h" -#include "position_info.h" -#include "parse_error.h" -#include "hs_internal.h" -#include "ue2common.h" -#include "nfagraph/ng_builder.h" -#include "util/charreach.h" -#include "util/container.h" + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/** \file + * \brief Glushkov construction. + */ +#include "buildstate.h" +#include "position.h" +#include "position_dump.h" +#include "position_info.h" +#include "parse_error.h" +#include "hs_internal.h" +#include "ue2common.h" +#include "nfagraph/ng_builder.h" +#include "util/charreach.h" +#include "util/container.h" #include "util/flat_containers.h" #include "util/hash.h" -#include "util/make_unique.h" +#include "util/make_unique.h" #include "util/unordered.h" - -#include <algorithm> -#include <iterator> -#include <limits> -#include <map> -#include <utility> - -#if defined(DEBUG) || defined(DUMP_SUPPORT) -#include <ostream> -#include <sstream> -#endif - -using namespace std; - -namespace ue2 { - -/** \brief Represents an uninitialized state. */ -const Position GlushkovBuildState::POS_UNINITIALIZED = - numeric_limits<Position>::max(); - -/** \brief Represents an epsilon transition in the firsts of a component. */ -const Position GlushkovBuildState::POS_EPSILON = - numeric_limits<Position>::max() - 1; - -GlushkovBuildState::~GlushkovBuildState() { } - -namespace /* anonymous */ { - -class CheckPositionFlags { -public: - explicit CheckPositionFlags(int fl) : flags(fl) {} - bool operator()(const PositionInfo &p) const { - return (p.flags & flags) == flags; - } -private: - int flags; -}; - -class CheckUnflaggedEpsilon { -public: - bool operator()(const PositionInfo &p) const { - return p.pos == GlushkovBuildState::POS_EPSILON && p.flags == 0; - } -}; - -/** \brief Concrete impl of the GlushkovBuildState interface. */ -class GlushkovBuildStateImpl : public GlushkovBuildState { -public: - GlushkovBuildStateImpl(NFABuilder &b, bool prefilter); - - /** \brief Returns a reference to the NFABuilder being used. */ - NFABuilder &getBuilder() override { return builder; } - - /** \brief Returns a const reference to the NFABuilder being used. */ - const NFABuilder &getBuilder() const override { return builder; } - - /** \brief Wire up the lasts of one component to the firsts of another. */ - void connectRegions(const vector<PositionInfo> &lasts, - const vector<PositionInfo> &firsts) override; - - /** \brief Wire the lasts of the main sequence to accepts. */ - void connectAccepts(const vector<PositionInfo> &lasts) override; - - /** \brief Wire up a single last to a list of firsts. */ - void connectSuccessors(const PositionInfo &last, - vector<PositionInfo> firsts); - - /** Wire up a pair of positions. */ - void addSuccessor(Position from, Position to) override; - - /** \brief Clone the vertex properties and edges of all vertices between - * two positions. */ - void cloneFollowSet(Position from, Position to, unsigned offset) override; - - /** \brief Build the prioritised list of edges out of our successor map. */ - void buildEdges() override; - - /** Construct an edge, called internally by \ref buildEdges. */ - void buildEdge(Position from, const PositionInfo &to); - - Position startState; - Position startDotstarState; - Position acceptState; - Position acceptEodState; - Position acceptNlEodState; - Position acceptNlState; - - NFABuilder &builder; //!< \brief builder for the NFAGraph - - bool doPrefilter; //!< \brief we're building a prefiltering pattern - - /** \brief Map storing successors for each position. */ - map<Position, flat_set<PositionInfo>> successors; -}; - -} // namespace - -GlushkovBuildStateImpl::GlushkovBuildStateImpl(NFABuilder &b, - bool prefilter) : - startState(b.getStart()), - startDotstarState(b.getStartDotStar()), - acceptState(b.getAccept()), - acceptEodState(b.getAcceptEOD()), - acceptNlEodState(POS_UNINITIALIZED), - acceptNlState(POS_UNINITIALIZED), - builder(b), - doPrefilter(prefilter) -{ - // Our special nodes need special relationships. - vector<PositionInfo> lasts, firsts; - - // start->startDs and startDs self-loop. - lasts.push_back(startState); - lasts.push_back(startDotstarState); - firsts.push_back(startDotstarState); - connectRegions(lasts, firsts); - - // accept to acceptEod edges already wired - - // XXX: a small hack to support vacuous NFAs: give start and startDs an - // initial report ID. - builder.setNodeReportID(startState, 0); - builder.setNodeReportID(startDotstarState, 0); -} - -static -void checkEmbeddedEndAnchor(const PositionInfo &from, - const vector<PositionInfo> &firsts) { - if (!(from.flags & POS_FLAG_ONLY_ENDS)) { - return; - } - - for (const auto &first : firsts) { - if (first.pos != GlushkovBuildStateImpl::POS_EPSILON) { - /* can make it through the parse tree */ - throw ParseError("Embedded end anchors not supported."); - } - } -} - -// Wire up the lasts of one component to the firsts of another -void -GlushkovBuildStateImpl::connectRegions(const vector<PositionInfo> &lasts, - const vector<PositionInfo> &firsts) { - for (const auto &last : lasts) { - checkEmbeddedEndAnchor(last, firsts); - connectSuccessors(last, firsts); - } -} - -static -void filterEdges(const GlushkovBuildStateImpl &bs, const PositionInfo &from, - vector<PositionInfo> &tolist) { - if (from.pos == bs.startDotstarState) { - // If we're connecting from start-dotstar, remove all caret flavoured - // positions. - CheckPositionFlags check(POS_FLAG_NOFLOAT); - tolist.erase(remove_if(tolist.begin(), tolist.end(), check), - tolist.end()); - if (from.flags & POS_FLAG_NOFLOAT) { - tolist.clear(); - } - } else if (from.pos == bs.startState) { - // If we're connecting from start, we should remove any epsilons that - // aren't caret flavoured. - CheckUnflaggedEpsilon check; - tolist.erase(remove_if(tolist.begin(), tolist.end(), check), - tolist.end()); - CheckPositionFlags check2(POS_FLAG_MUST_FLOAT | POS_FLAG_NOFLOAT); - tolist.erase(remove_if(tolist.begin(), tolist.end(), check2), - tolist.end()); - } - - if (bs.builder.getAssertFlag(from.pos) & POS_FLAG_MULTILINE_START) { - // If we have a (mildly boneheaded) pattern like /^$/m, we're right up - // against the edge of what we can do without true assertion support. - // Here we have an evil hack to prevent us plugging the \n generated by - // the caret right into acceptEod (which is in the firsts of the - // dollar). - /* This is due to the 'interesting quirk' that multiline ^ does not - * not match a newline at the end of buffer. */ - DEBUG_PRINTF("multiline start - no eod\n"); - tolist.erase(remove(tolist.begin(), tolist.end(), bs.acceptEodState), - tolist.end()); - } -} - -static -Position makeNewlineAssertPos(GlushkovBuildState &bs) { - NFABuilder &builder = bs.getBuilder(); - Position newline = builder.makePositions(1); - builder.addCharReach(newline, CharReach('\n')); - builder.setAssertFlag(newline, POS_FLAG_FIDDLE_ACCEPT); - builder.setNodeReportID(newline, -1); - return newline; -} - -static -void generateAccepts(GlushkovBuildStateImpl &bs, const PositionInfo &from, - vector<PositionInfo> *tolist) { - NFABuilder &builder = bs.getBuilder(); - u32 flags = from.flags; - - bool require_eod = flags & POS_FLAG_WIRE_EOD; - bool require_nl_eod = flags & POS_FLAG_WIRE_NL_EOD - && !(flags & POS_FLAG_NO_NL_EOD); - bool require_nl_accept = (flags & POS_FLAG_WIRE_NL_ACCEPT) - && !(flags & POS_FLAG_NO_NL_ACCEPT); - - bool require_accept = !(flags & POS_FLAG_ONLY_ENDS); - - if (require_eod) { - tolist->push_back(bs.acceptEodState); - } - - if (require_nl_accept) { - if (bs.acceptNlState == GlushkovBuildState::POS_UNINITIALIZED) { - Position newline = makeNewlineAssertPos(bs); - bs.addSuccessor(newline, builder.getAccept()); - bs.acceptNlState = newline; - } - tolist->push_back(bs.acceptNlState); - } - - if (require_nl_eod) { - if (bs.acceptNlEodState == GlushkovBuildState::POS_UNINITIALIZED) { - Position newline = makeNewlineAssertPos(bs); - bs.addSuccessor(newline, builder.getAcceptEOD()); - bs.acceptNlEodState = newline; - } - tolist->push_back(bs.acceptNlEodState); - } - - if (require_accept) { - tolist->push_back(bs.acceptState); - } -} - -void GlushkovBuildStateImpl::connectAccepts(const vector<PositionInfo> &lasts) { - for (const auto &last : lasts) { - vector<PositionInfo> accepts; - generateAccepts(*this, last, &accepts); - connectSuccessors(last, accepts); - } -} - -#if defined(DEBUG) || defined(DUMP_SUPPORT) - -static UNUSED -string dumpCaptures(const PositionInfo &p) { - ostringstream oss; - - if (p.flags & POS_FLAG_NOFLOAT) { - oss << "<nofloat>"; - } - if (p.flags & POS_FLAG_MUST_FLOAT) { - oss << "<must_float>"; - } - if (p.flags & POS_FLAG_FIDDLE_ACCEPT) { - oss << "<fiddle_accept>"; - } - if (p.flags & POS_FLAG_ONLY_ENDS) { - oss << "<only_ends>"; - } - if (p.flags & POS_FLAG_NO_NL_EOD) { - oss << "<no_nl_eod>"; - } - if (p.flags & POS_FLAG_NO_NL_ACCEPT) { - oss << "<no_nl_acc>"; - } - - return oss.str(); -} - -#endif // DEBUG || DUMP_SUPPORT - -void GlushkovBuildStateImpl::connectSuccessors(const PositionInfo &from, - vector<PositionInfo> tolist) { - /* note: tolist maybe modified for our own internal use -> not a reference */ - assert(from.pos != POS_EPSILON); - assert(from.pos != POS_UNINITIALIZED); - assert(find(tolist.begin(), tolist.end(), POS_UNINITIALIZED) - == tolist.end()); - - DEBUG_PRINTF("FROM = %u%s TO = %s\n", from.pos, dumpCaptures(from).c_str(), - dumpPositions(tolist.begin(), tolist.end()).c_str()); - - /* prevent creation of edges with invalid assertions */ - filterEdges(*this, from, tolist); - - if (from.flags & POS_FLAG_FIDDLE_ACCEPT) { - auto accept = find(tolist.begin(), tolist.end(), acceptState); - if (accept != tolist.end()) { - DEBUG_PRINTF("accept through -1 offset-adjusting dot\n"); - Position fakedot = builder.makePositions(1); - builder.addCharReach(fakedot, CharReach(0x00, 0xff)); - builder.setNodeReportID(fakedot, -1); - addSuccessor(fakedot, acceptState); - *accept = fakedot; - } else { - // We might lead to accept via an assertion vertex, so we add the - // offset adj to this vertex itself. Used for cases like /^\B/m, - // which should match only at 0 for '\n'. - builder.setNodeReportID(from.pos, -1); - } - - assert(find(tolist.begin(), tolist.end(), acceptState) == tolist.end()); - } - - auto &succ = successors[from.pos]; - - DEBUG_PRINTF("connect %u -> %s\n", from.pos, - dumpPositions(tolist.begin(), tolist.end()).c_str()); - DEBUG_PRINTF("%u curr succ: %s\n", from.pos, - dumpPositions(begin(succ), end(succ)).c_str()); - - for (const auto &to : tolist) { - if (to.pos != POS_EPSILON) { - succ.insert(to); - } - } - - DEBUG_PRINTF("%u succ: %s\n", from.pos, - dumpPositions(begin(succ), end(succ)).c_str()); -} - -void GlushkovBuildStateImpl::addSuccessor(Position from, Position to) { - DEBUG_PRINTF("connect %u -> %u\n", from, to); - assert(from != POS_EPSILON && from != POS_UNINITIALIZED); - assert(to != POS_EPSILON && to != POS_UNINITIALIZED); - - auto &succ = successors[from]; - succ.insert(to); - - DEBUG_PRINTF("%u succ: %s\n", from, - dumpPositions(begin(succ), end(succ)).c_str()); -} - -void GlushkovBuildStateImpl::cloneFollowSet(Position first, Position last, - unsigned offset) { - assert(first <= last); - - // Clone vertex properties (reachability, etc) - builder.cloneRegion(first, last, offset); - - /* Clone the successors of all the positions between first and last - * inclusive, producing a new set of positions starting at (first + - * offset). */ - for (Position i = first; i <= last; i++) { - // This should be a new position. - assert(successors[i + offset].empty()); - - for (const PositionInfo &to : successors[i]) { - if (to.pos >= first && to.pos <= last) { - PositionInfo clone(to); - clone.pos += offset; - DEBUG_PRINTF("clone: %u -> %u\n", i + offset, clone.pos); - successors[i + offset].insert(clone); - } else { - // There shouldn't be any stray edges leading out of this - // region! - assert(0); - } - } - } -} - -void GlushkovBuildStateImpl::buildEdge(Position from, const PositionInfo &to) { - // Guard against embedded anchors - if (to == startState) { - /* can make it through the parse tree */ - throw ParseError("Embedded start anchors not supported."); - } - - assert(to.pos != POS_UNINITIALIZED); - assert(to.pos != POS_EPSILON); - - if (builder.hasEdge(from, to.pos)) { - return; - } - - builder.addEdge(from, to.pos); -} - -void GlushkovBuildStateImpl::buildEdges() { - // Create all the edges and track which vertices are asserts which need to - // be removed later. - for (const auto &m : successors) { - const Position from = m.first; - for (const auto &to : m.second) { - buildEdge(from, to); - } - } -} - -// Construct a usable GlushkovBuildState for the outside world. -unique_ptr<GlushkovBuildState> makeGlushkovBuildState(NFABuilder &b, - bool prefilter) { - return ue2::make_unique<GlushkovBuildStateImpl>(b, prefilter); -} - -// free functions for utility use - -/** \brief Eliminate lower-priority duplicate PositionInfo entries. - * - * Scans through a list of positions and retains only the highest priority - * version of a given (position, flags) entry. */ -void cleanupPositions(vector<PositionInfo> &a) { + +#include <algorithm> +#include <iterator> +#include <limits> +#include <map> +#include <utility> + +#if defined(DEBUG) || defined(DUMP_SUPPORT) +#include <ostream> +#include <sstream> +#endif + +using namespace std; + +namespace ue2 { + +/** \brief Represents an uninitialized state. */ +const Position GlushkovBuildState::POS_UNINITIALIZED = + numeric_limits<Position>::max(); + +/** \brief Represents an epsilon transition in the firsts of a component. */ +const Position GlushkovBuildState::POS_EPSILON = + numeric_limits<Position>::max() - 1; + +GlushkovBuildState::~GlushkovBuildState() { } + +namespace /* anonymous */ { + +class CheckPositionFlags { +public: + explicit CheckPositionFlags(int fl) : flags(fl) {} + bool operator()(const PositionInfo &p) const { + return (p.flags & flags) == flags; + } +private: + int flags; +}; + +class CheckUnflaggedEpsilon { +public: + bool operator()(const PositionInfo &p) const { + return p.pos == GlushkovBuildState::POS_EPSILON && p.flags == 0; + } +}; + +/** \brief Concrete impl of the GlushkovBuildState interface. */ +class GlushkovBuildStateImpl : public GlushkovBuildState { +public: + GlushkovBuildStateImpl(NFABuilder &b, bool prefilter); + + /** \brief Returns a reference to the NFABuilder being used. */ + NFABuilder &getBuilder() override { return builder; } + + /** \brief Returns a const reference to the NFABuilder being used. */ + const NFABuilder &getBuilder() const override { return builder; } + + /** \brief Wire up the lasts of one component to the firsts of another. */ + void connectRegions(const vector<PositionInfo> &lasts, + const vector<PositionInfo> &firsts) override; + + /** \brief Wire the lasts of the main sequence to accepts. */ + void connectAccepts(const vector<PositionInfo> &lasts) override; + + /** \brief Wire up a single last to a list of firsts. */ + void connectSuccessors(const PositionInfo &last, + vector<PositionInfo> firsts); + + /** Wire up a pair of positions. */ + void addSuccessor(Position from, Position to) override; + + /** \brief Clone the vertex properties and edges of all vertices between + * two positions. */ + void cloneFollowSet(Position from, Position to, unsigned offset) override; + + /** \brief Build the prioritised list of edges out of our successor map. */ + void buildEdges() override; + + /** Construct an edge, called internally by \ref buildEdges. */ + void buildEdge(Position from, const PositionInfo &to); + + Position startState; + Position startDotstarState; + Position acceptState; + Position acceptEodState; + Position acceptNlEodState; + Position acceptNlState; + + NFABuilder &builder; //!< \brief builder for the NFAGraph + + bool doPrefilter; //!< \brief we're building a prefiltering pattern + + /** \brief Map storing successors for each position. */ + map<Position, flat_set<PositionInfo>> successors; +}; + +} // namespace + +GlushkovBuildStateImpl::GlushkovBuildStateImpl(NFABuilder &b, + bool prefilter) : + startState(b.getStart()), + startDotstarState(b.getStartDotStar()), + acceptState(b.getAccept()), + acceptEodState(b.getAcceptEOD()), + acceptNlEodState(POS_UNINITIALIZED), + acceptNlState(POS_UNINITIALIZED), + builder(b), + doPrefilter(prefilter) +{ + // Our special nodes need special relationships. + vector<PositionInfo> lasts, firsts; + + // start->startDs and startDs self-loop. + lasts.push_back(startState); + lasts.push_back(startDotstarState); + firsts.push_back(startDotstarState); + connectRegions(lasts, firsts); + + // accept to acceptEod edges already wired + + // XXX: a small hack to support vacuous NFAs: give start and startDs an + // initial report ID. + builder.setNodeReportID(startState, 0); + builder.setNodeReportID(startDotstarState, 0); +} + +static +void checkEmbeddedEndAnchor(const PositionInfo &from, + const vector<PositionInfo> &firsts) { + if (!(from.flags & POS_FLAG_ONLY_ENDS)) { + return; + } + + for (const auto &first : firsts) { + if (first.pos != GlushkovBuildStateImpl::POS_EPSILON) { + /* can make it through the parse tree */ + throw ParseError("Embedded end anchors not supported."); + } + } +} + +// Wire up the lasts of one component to the firsts of another +void +GlushkovBuildStateImpl::connectRegions(const vector<PositionInfo> &lasts, + const vector<PositionInfo> &firsts) { + for (const auto &last : lasts) { + checkEmbeddedEndAnchor(last, firsts); + connectSuccessors(last, firsts); + } +} + +static +void filterEdges(const GlushkovBuildStateImpl &bs, const PositionInfo &from, + vector<PositionInfo> &tolist) { + if (from.pos == bs.startDotstarState) { + // If we're connecting from start-dotstar, remove all caret flavoured + // positions. + CheckPositionFlags check(POS_FLAG_NOFLOAT); + tolist.erase(remove_if(tolist.begin(), tolist.end(), check), + tolist.end()); + if (from.flags & POS_FLAG_NOFLOAT) { + tolist.clear(); + } + } else if (from.pos == bs.startState) { + // If we're connecting from start, we should remove any epsilons that + // aren't caret flavoured. + CheckUnflaggedEpsilon check; + tolist.erase(remove_if(tolist.begin(), tolist.end(), check), + tolist.end()); + CheckPositionFlags check2(POS_FLAG_MUST_FLOAT | POS_FLAG_NOFLOAT); + tolist.erase(remove_if(tolist.begin(), tolist.end(), check2), + tolist.end()); + } + + if (bs.builder.getAssertFlag(from.pos) & POS_FLAG_MULTILINE_START) { + // If we have a (mildly boneheaded) pattern like /^$/m, we're right up + // against the edge of what we can do without true assertion support. + // Here we have an evil hack to prevent us plugging the \n generated by + // the caret right into acceptEod (which is in the firsts of the + // dollar). + /* This is due to the 'interesting quirk' that multiline ^ does not + * not match a newline at the end of buffer. */ + DEBUG_PRINTF("multiline start - no eod\n"); + tolist.erase(remove(tolist.begin(), tolist.end(), bs.acceptEodState), + tolist.end()); + } +} + +static +Position makeNewlineAssertPos(GlushkovBuildState &bs) { + NFABuilder &builder = bs.getBuilder(); + Position newline = builder.makePositions(1); + builder.addCharReach(newline, CharReach('\n')); + builder.setAssertFlag(newline, POS_FLAG_FIDDLE_ACCEPT); + builder.setNodeReportID(newline, -1); + return newline; +} + +static +void generateAccepts(GlushkovBuildStateImpl &bs, const PositionInfo &from, + vector<PositionInfo> *tolist) { + NFABuilder &builder = bs.getBuilder(); + u32 flags = from.flags; + + bool require_eod = flags & POS_FLAG_WIRE_EOD; + bool require_nl_eod = flags & POS_FLAG_WIRE_NL_EOD + && !(flags & POS_FLAG_NO_NL_EOD); + bool require_nl_accept = (flags & POS_FLAG_WIRE_NL_ACCEPT) + && !(flags & POS_FLAG_NO_NL_ACCEPT); + + bool require_accept = !(flags & POS_FLAG_ONLY_ENDS); + + if (require_eod) { + tolist->push_back(bs.acceptEodState); + } + + if (require_nl_accept) { + if (bs.acceptNlState == GlushkovBuildState::POS_UNINITIALIZED) { + Position newline = makeNewlineAssertPos(bs); + bs.addSuccessor(newline, builder.getAccept()); + bs.acceptNlState = newline; + } + tolist->push_back(bs.acceptNlState); + } + + if (require_nl_eod) { + if (bs.acceptNlEodState == GlushkovBuildState::POS_UNINITIALIZED) { + Position newline = makeNewlineAssertPos(bs); + bs.addSuccessor(newline, builder.getAcceptEOD()); + bs.acceptNlEodState = newline; + } + tolist->push_back(bs.acceptNlEodState); + } + + if (require_accept) { + tolist->push_back(bs.acceptState); + } +} + +void GlushkovBuildStateImpl::connectAccepts(const vector<PositionInfo> &lasts) { + for (const auto &last : lasts) { + vector<PositionInfo> accepts; + generateAccepts(*this, last, &accepts); + connectSuccessors(last, accepts); + } +} + +#if defined(DEBUG) || defined(DUMP_SUPPORT) + +static UNUSED +string dumpCaptures(const PositionInfo &p) { + ostringstream oss; + + if (p.flags & POS_FLAG_NOFLOAT) { + oss << "<nofloat>"; + } + if (p.flags & POS_FLAG_MUST_FLOAT) { + oss << "<must_float>"; + } + if (p.flags & POS_FLAG_FIDDLE_ACCEPT) { + oss << "<fiddle_accept>"; + } + if (p.flags & POS_FLAG_ONLY_ENDS) { + oss << "<only_ends>"; + } + if (p.flags & POS_FLAG_NO_NL_EOD) { + oss << "<no_nl_eod>"; + } + if (p.flags & POS_FLAG_NO_NL_ACCEPT) { + oss << "<no_nl_acc>"; + } + + return oss.str(); +} + +#endif // DEBUG || DUMP_SUPPORT + +void GlushkovBuildStateImpl::connectSuccessors(const PositionInfo &from, + vector<PositionInfo> tolist) { + /* note: tolist maybe modified for our own internal use -> not a reference */ + assert(from.pos != POS_EPSILON); + assert(from.pos != POS_UNINITIALIZED); + assert(find(tolist.begin(), tolist.end(), POS_UNINITIALIZED) + == tolist.end()); + + DEBUG_PRINTF("FROM = %u%s TO = %s\n", from.pos, dumpCaptures(from).c_str(), + dumpPositions(tolist.begin(), tolist.end()).c_str()); + + /* prevent creation of edges with invalid assertions */ + filterEdges(*this, from, tolist); + + if (from.flags & POS_FLAG_FIDDLE_ACCEPT) { + auto accept = find(tolist.begin(), tolist.end(), acceptState); + if (accept != tolist.end()) { + DEBUG_PRINTF("accept through -1 offset-adjusting dot\n"); + Position fakedot = builder.makePositions(1); + builder.addCharReach(fakedot, CharReach(0x00, 0xff)); + builder.setNodeReportID(fakedot, -1); + addSuccessor(fakedot, acceptState); + *accept = fakedot; + } else { + // We might lead to accept via an assertion vertex, so we add the + // offset adj to this vertex itself. Used for cases like /^\B/m, + // which should match only at 0 for '\n'. + builder.setNodeReportID(from.pos, -1); + } + + assert(find(tolist.begin(), tolist.end(), acceptState) == tolist.end()); + } + + auto &succ = successors[from.pos]; + + DEBUG_PRINTF("connect %u -> %s\n", from.pos, + dumpPositions(tolist.begin(), tolist.end()).c_str()); + DEBUG_PRINTF("%u curr succ: %s\n", from.pos, + dumpPositions(begin(succ), end(succ)).c_str()); + + for (const auto &to : tolist) { + if (to.pos != POS_EPSILON) { + succ.insert(to); + } + } + + DEBUG_PRINTF("%u succ: %s\n", from.pos, + dumpPositions(begin(succ), end(succ)).c_str()); +} + +void GlushkovBuildStateImpl::addSuccessor(Position from, Position to) { + DEBUG_PRINTF("connect %u -> %u\n", from, to); + assert(from != POS_EPSILON && from != POS_UNINITIALIZED); + assert(to != POS_EPSILON && to != POS_UNINITIALIZED); + + auto &succ = successors[from]; + succ.insert(to); + + DEBUG_PRINTF("%u succ: %s\n", from, + dumpPositions(begin(succ), end(succ)).c_str()); +} + +void GlushkovBuildStateImpl::cloneFollowSet(Position first, Position last, + unsigned offset) { + assert(first <= last); + + // Clone vertex properties (reachability, etc) + builder.cloneRegion(first, last, offset); + + /* Clone the successors of all the positions between first and last + * inclusive, producing a new set of positions starting at (first + + * offset). */ + for (Position i = first; i <= last; i++) { + // This should be a new position. + assert(successors[i + offset].empty()); + + for (const PositionInfo &to : successors[i]) { + if (to.pos >= first && to.pos <= last) { + PositionInfo clone(to); + clone.pos += offset; + DEBUG_PRINTF("clone: %u -> %u\n", i + offset, clone.pos); + successors[i + offset].insert(clone); + } else { + // There shouldn't be any stray edges leading out of this + // region! + assert(0); + } + } + } +} + +void GlushkovBuildStateImpl::buildEdge(Position from, const PositionInfo &to) { + // Guard against embedded anchors + if (to == startState) { + /* can make it through the parse tree */ + throw ParseError("Embedded start anchors not supported."); + } + + assert(to.pos != POS_UNINITIALIZED); + assert(to.pos != POS_EPSILON); + + if (builder.hasEdge(from, to.pos)) { + return; + } + + builder.addEdge(from, to.pos); +} + +void GlushkovBuildStateImpl::buildEdges() { + // Create all the edges and track which vertices are asserts which need to + // be removed later. + for (const auto &m : successors) { + const Position from = m.first; + for (const auto &to : m.second) { + buildEdge(from, to); + } + } +} + +// Construct a usable GlushkovBuildState for the outside world. +unique_ptr<GlushkovBuildState> makeGlushkovBuildState(NFABuilder &b, + bool prefilter) { + return ue2::make_unique<GlushkovBuildStateImpl>(b, prefilter); +} + +// free functions for utility use + +/** \brief Eliminate lower-priority duplicate PositionInfo entries. + * + * Scans through a list of positions and retains only the highest priority + * version of a given (position, flags) entry. */ +void cleanupPositions(vector<PositionInfo> &a) { ue2_unordered_set<pair<Position, int>> seen; - - vector<PositionInfo> out; - out.reserve(a.size()); // output should be close to input in size. - - for (const auto &p : a) { - if (seen.emplace(p.pos, p.flags).second) { - out.push_back(p); // first encounter - } - } - - DEBUG_PRINTF("in %zu; out %zu\n", a.size(), out.size()); - a.swap(out); -} - -static -vector<PositionInfo>::iterator -replaceElemWithSequence(vector<PositionInfo> &dest, - vector<PositionInfo>::iterator &victim, - const vector<PositionInfo> &replacement) { - auto past = dest.erase(victim); - size_t d = distance(dest.begin(), past) + replacement.size(); - dest.insert(past, replacement.begin(), replacement.end()); - /* recalc past as iterator may have been invalidated */ - return dest.begin() + d; -} - -/** \brief Replace all epsilons with the given positions. - * - * Replace epsilons in a firsts list with another given firsts list. Note: the - * firsts lists must come from disjoint sets of components. If no epsilons are - * in the first firsts list the source is appended to the end. - */ -void replaceEpsilons(vector<PositionInfo> &target, - const vector<PositionInfo> &source) { - auto found = - find(target.begin(), target.end(), GlushkovBuildState::POS_EPSILON); - - if (found == target.end()) { - // no epsilons to replace, push on to the end - target.insert(target.end(), source.begin(), source.end()); - return; - } - - while (found != target.end()) { - checkEmbeddedEndAnchor(*found, source); - - // replace this epsilon with a copy of source with the same flags - vector<PositionInfo> newsource(source); - for (auto &pos : newsource) { - pos.flags |= found->flags; - } - - found = replaceElemWithSequence(target, found, newsource); - // find the next epsilon - found = find(found, target.end(), GlushkovBuildState::POS_EPSILON); - } - - cleanupPositions(target); -} - -#ifdef DUMP_SUPPORT - -void dump(ostream &os, const PositionInfo &p) { - if (p.pos == GlushkovBuildState::POS_EPSILON) { - os << "epsilon"; - } else { - os << p.pos; - } - - os << dumpCaptures(p); -} - -#endif // DUMP_SUPPORT - -} // namespace ue2 + + vector<PositionInfo> out; + out.reserve(a.size()); // output should be close to input in size. + + for (const auto &p : a) { + if (seen.emplace(p.pos, p.flags).second) { + out.push_back(p); // first encounter + } + } + + DEBUG_PRINTF("in %zu; out %zu\n", a.size(), out.size()); + a.swap(out); +} + +static +vector<PositionInfo>::iterator +replaceElemWithSequence(vector<PositionInfo> &dest, + vector<PositionInfo>::iterator &victim, + const vector<PositionInfo> &replacement) { + auto past = dest.erase(victim); + size_t d = distance(dest.begin(), past) + replacement.size(); + dest.insert(past, replacement.begin(), replacement.end()); + /* recalc past as iterator may have been invalidated */ + return dest.begin() + d; +} + +/** \brief Replace all epsilons with the given positions. + * + * Replace epsilons in a firsts list with another given firsts list. Note: the + * firsts lists must come from disjoint sets of components. If no epsilons are + * in the first firsts list the source is appended to the end. + */ +void replaceEpsilons(vector<PositionInfo> &target, + const vector<PositionInfo> &source) { + auto found = + find(target.begin(), target.end(), GlushkovBuildState::POS_EPSILON); + + if (found == target.end()) { + // no epsilons to replace, push on to the end + target.insert(target.end(), source.begin(), source.end()); + return; + } + + while (found != target.end()) { + checkEmbeddedEndAnchor(*found, source); + + // replace this epsilon with a copy of source with the same flags + vector<PositionInfo> newsource(source); + for (auto &pos : newsource) { + pos.flags |= found->flags; + } + + found = replaceElemWithSequence(target, found, newsource); + // find the next epsilon + found = find(found, target.end(), GlushkovBuildState::POS_EPSILON); + } + + cleanupPositions(target); +} + +#ifdef DUMP_SUPPORT + +void dump(ostream &os, const PositionInfo &p) { + if (p.pos == GlushkovBuildState::POS_EPSILON) { + os << "epsilon"; + } else { + os << p.pos; + } + + os << dumpCaptures(p); +} + +#endif // DUMP_SUPPORT + +} // namespace ue2 diff --git a/contrib/libs/hyperscan/src/parser/buildstate.h b/contrib/libs/hyperscan/src/parser/buildstate.h index 89d71f7160..5ddaf9b238 100644 --- a/contrib/libs/hyperscan/src/parser/buildstate.h +++ b/contrib/libs/hyperscan/src/parser/buildstate.h @@ -1,103 +1,103 @@ -/* +/* * Copyright (c) 2015-2017, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -/** \file - * \brief Glushkov construction. - */ - -#ifndef BUILDSTATE_H -#define BUILDSTATE_H - -#include "ue2common.h" -#include "position.h" + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/** \file + * \brief Glushkov construction. + */ + +#ifndef BUILDSTATE_H +#define BUILDSTATE_H + +#include "ue2common.h" +#include "position.h" #include "util/noncopyable.h" - -#include <memory> -#include <vector> - -namespace ue2 { - -class NFABuilder; -class PositionInfo; - -/** \brief Machinery for Glushkov construction. - * - * Abstract base class; use \ref makeGlushkovBuildState to get one of these you - * can use. */ + +#include <memory> +#include <vector> + +namespace ue2 { + +class NFABuilder; +class PositionInfo; + +/** \brief Machinery for Glushkov construction. + * + * Abstract base class; use \ref makeGlushkovBuildState to get one of these you + * can use. */ class GlushkovBuildState : noncopyable { -public: - /** \brief Represents an uninitialized state. */ - static const Position POS_UNINITIALIZED; - - /** \brief Represents an epsilon transition in the firsts of a component. */ - static const Position POS_EPSILON; - - virtual ~GlushkovBuildState(); - - /** \brief Returns a reference to the NFABuilder being used. */ - virtual NFABuilder &getBuilder() = 0; - - /** \brief Returns a const reference to the NFABuilder being used. */ - virtual const NFABuilder &getBuilder() const = 0; - - /** \brief Wire up edges from the lasts of one component to the firsts of - * another. */ - virtual void connectRegions(const std::vector<PositionInfo> &lasts, - const std::vector<PositionInfo> &firsts) = 0; - - /** \brief Wire the lasts of the main sequence to accepts. */ - virtual void connectAccepts(const std::vector<PositionInfo> &lasts) = 0; - - /** \brief Wire up a pair of positions. */ - virtual void addSuccessor(Position from, Position to) = 0; - - /** \brief Clone the vertex properties and edges of all vertices between - * two positions. */ - virtual void cloneFollowSet(Position from, Position to, u32 offset) = 0; - - /** \brief Build the prioritised list of edges out of our successor map. */ - virtual void buildEdges() = 0; -}; - -/** \brief Returns a new GlushkovBuildState object. */ -std::unique_ptr<GlushkovBuildState> makeGlushkovBuildState(NFABuilder &b, - bool prefilter); - -/** \brief Replace all epsilons with the given positions. */ -void replaceEpsilons(std::vector<PositionInfo> &target, - const std::vector<PositionInfo> &source); - -/** \brief Eliminate lower-priority duplicate PositionInfo entries. - * - * Scans through a list of positions and retains only the highest priority - * version of a given (position, flags) entry. */ -void cleanupPositions(std::vector<PositionInfo> &a); - -} // namespace ue2 - -#endif +public: + /** \brief Represents an uninitialized state. */ + static const Position POS_UNINITIALIZED; + + /** \brief Represents an epsilon transition in the firsts of a component. */ + static const Position POS_EPSILON; + + virtual ~GlushkovBuildState(); + + /** \brief Returns a reference to the NFABuilder being used. */ + virtual NFABuilder &getBuilder() = 0; + + /** \brief Returns a const reference to the NFABuilder being used. */ + virtual const NFABuilder &getBuilder() const = 0; + + /** \brief Wire up edges from the lasts of one component to the firsts of + * another. */ + virtual void connectRegions(const std::vector<PositionInfo> &lasts, + const std::vector<PositionInfo> &firsts) = 0; + + /** \brief Wire the lasts of the main sequence to accepts. */ + virtual void connectAccepts(const std::vector<PositionInfo> &lasts) = 0; + + /** \brief Wire up a pair of positions. */ + virtual void addSuccessor(Position from, Position to) = 0; + + /** \brief Clone the vertex properties and edges of all vertices between + * two positions. */ + virtual void cloneFollowSet(Position from, Position to, u32 offset) = 0; + + /** \brief Build the prioritised list of edges out of our successor map. */ + virtual void buildEdges() = 0; +}; + +/** \brief Returns a new GlushkovBuildState object. */ +std::unique_ptr<GlushkovBuildState> makeGlushkovBuildState(NFABuilder &b, + bool prefilter); + +/** \brief Replace all epsilons with the given positions. */ +void replaceEpsilons(std::vector<PositionInfo> &target, + const std::vector<PositionInfo> &source); + +/** \brief Eliminate lower-priority duplicate PositionInfo entries. + * + * Scans through a list of positions and retains only the highest priority + * version of a given (position, flags) entry. */ +void cleanupPositions(std::vector<PositionInfo> &a); + +} // namespace ue2 + +#endif diff --git a/contrib/libs/hyperscan/src/parser/check_refs.cpp b/contrib/libs/hyperscan/src/parser/check_refs.cpp index 6a99fb3291..60b5b6ba77 100644 --- a/contrib/libs/hyperscan/src/parser/check_refs.cpp +++ b/contrib/libs/hyperscan/src/parser/check_refs.cpp @@ -1,122 +1,122 @@ -/* +/* * Copyright (c) 2015-2017, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -/** \file - * \brief Component tree analysis that checks that references (such as - * back-refs, conditionals) have valid referents. - */ -#include "check_refs.h" -#include "ComponentBackReference.h" -#include "ComponentCondReference.h" -#include "ConstComponentVisitor.h" -#include "parse_error.h" -#include "util/container.h" + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/** \file + * \brief Component tree analysis that checks that references (such as + * back-refs, conditionals) have valid referents. + */ +#include "check_refs.h" +#include "ComponentBackReference.h" +#include "ComponentCondReference.h" +#include "ConstComponentVisitor.h" +#include "parse_error.h" +#include "util/container.h" #include "util/flat_containers.h" - -#include <sstream> - -using namespace std; - -namespace ue2 { - -/** - * \brief Visitor that checks the validity of references against a known list - * of indices and labels. - */ -class ReferenceVisitor: public DefaultConstComponentVisitor { -private: - const size_t num_ids; - const flat_set<string> &names; - -public: - ReferenceVisitor(size_t num_groups, const flat_set<string> &targets) - : num_ids(num_groups), names(targets) {} - - ~ReferenceVisitor() override; - - void invalid_index(const char *component, unsigned id) { - assert(component); - ostringstream str; - str << "Invalid " << component << " to expression " << id << "."; - throw ParseError(str.str()); - } - - void invalid_label(const char *component, const std::string &label) { - assert(component); - ostringstream str; - str << "Invalid " << component << " to label '" << label << "'."; - throw ParseError(str.str()); - } - + +#include <sstream> + +using namespace std; + +namespace ue2 { + +/** + * \brief Visitor that checks the validity of references against a known list + * of indices and labels. + */ +class ReferenceVisitor: public DefaultConstComponentVisitor { +private: + const size_t num_ids; + const flat_set<string> &names; + +public: + ReferenceVisitor(size_t num_groups, const flat_set<string> &targets) + : num_ids(num_groups), names(targets) {} + + ~ReferenceVisitor() override; + + void invalid_index(const char *component, unsigned id) { + assert(component); + ostringstream str; + str << "Invalid " << component << " to expression " << id << "."; + throw ParseError(str.str()); + } + + void invalid_label(const char *component, const std::string &label) { + assert(component); + ostringstream str; + str << "Invalid " << component << " to label '" << label << "'."; + throw ParseError(str.str()); + } + using DefaultConstComponentVisitor::pre; - void pre(const ComponentBackReference &c) override { - if (c.ref_id) { - if (c.ref_id >= num_ids) { - invalid_index("back reference", c.ref_id); - } - } else { - if (!contains(names, c.name)) { - invalid_label("back reference", c.name); - } - } - } - - void pre(const ComponentCondReference &c) override { - switch (c.kind) { - case ComponentCondReference::CONDITION_NUMBER: - if (c.ref_id >= num_ids) { - invalid_index("conditional reference", c.ref_id); - } - break; - case ComponentCondReference::CONDITION_NAME: - if (c.ref_name == "DEFINE") { - // The string "DEFINE" is a special "always false" condition - // used to define subroutines. - break; - } - if (!contains(names, c.ref_name)) { - invalid_label("conditional reference", c.ref_name); - } - break; - case ComponentCondReference::CONDITION_ASSERTION: - break; - } - } -}; - -// Out-of-line destructor to silence weak vtable warnings. -ReferenceVisitor::~ReferenceVisitor() {} - -void checkReferences(const Component &root, unsigned int groupIndices, + void pre(const ComponentBackReference &c) override { + if (c.ref_id) { + if (c.ref_id >= num_ids) { + invalid_index("back reference", c.ref_id); + } + } else { + if (!contains(names, c.name)) { + invalid_label("back reference", c.name); + } + } + } + + void pre(const ComponentCondReference &c) override { + switch (c.kind) { + case ComponentCondReference::CONDITION_NUMBER: + if (c.ref_id >= num_ids) { + invalid_index("conditional reference", c.ref_id); + } + break; + case ComponentCondReference::CONDITION_NAME: + if (c.ref_name == "DEFINE") { + // The string "DEFINE" is a special "always false" condition + // used to define subroutines. + break; + } + if (!contains(names, c.ref_name)) { + invalid_label("conditional reference", c.ref_name); + } + break; + case ComponentCondReference::CONDITION_ASSERTION: + break; + } + } +}; + +// Out-of-line destructor to silence weak vtable warnings. +ReferenceVisitor::~ReferenceVisitor() {} + +void checkReferences(const Component &root, unsigned int groupIndices, const flat_set<std::string> &groupNames) { - ReferenceVisitor vis(groupIndices, groupNames); - root.accept(vis); -} - -} // namespace ue2 + ReferenceVisitor vis(groupIndices, groupNames); + root.accept(vis); +} + +} // namespace ue2 diff --git a/contrib/libs/hyperscan/src/parser/check_refs.h b/contrib/libs/hyperscan/src/parser/check_refs.h index c7958119b3..26912fb8e4 100644 --- a/contrib/libs/hyperscan/src/parser/check_refs.h +++ b/contrib/libs/hyperscan/src/parser/check_refs.h @@ -1,52 +1,52 @@ -/* +/* * Copyright (c) 2015-2017, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + /** * \file - * \brief Component tree analysis that checks that references (such as - * back-refs, conditionals) have valid referents. - */ - + * \brief Component tree analysis that checks that references (such as + * back-refs, conditionals) have valid referents. + */ + #ifndef PARSER_CHECK_REFS_H #define PARSER_CHECK_REFS_H - + #include "util/flat_containers.h" -#include <string> - -namespace ue2 { - -class Component; -class ComponentSequence; - -void checkReferences(const Component &root, unsigned int groupIndices, +#include <string> + +namespace ue2 { + +class Component; +class ComponentSequence; + +void checkReferences(const Component &root, unsigned int groupIndices, const flat_set<std::string> &groupNames); - -} // namespace ue2 - + +} // namespace ue2 + #endif // PARSER_CHECK_REFS_H diff --git a/contrib/libs/hyperscan/src/parser/dump.h b/contrib/libs/hyperscan/src/parser/dump.h index 538a843e02..c920849272 100644 --- a/contrib/libs/hyperscan/src/parser/dump.h +++ b/contrib/libs/hyperscan/src/parser/dump.h @@ -1,48 +1,48 @@ -/* - * Copyright (c) 2015, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -#ifndef PARSER_DUMP_H_ -#define PARSER_DUMP_H_ - -#ifdef DUMP_SUPPORT - -#include <ostream> - -namespace ue2 { - -class Component; - -/** \brief Dump a text representation of the given component tree. Only - * available in DUMP_SUPPORT builds. */ -void dumpTree(std::ostream &os, const Component *const root); - -} // namespace ue2 - -#endif // DUMP_SUPPORT - -#endif // PARSER_DUMP_H_ +/* + * Copyright (c) 2015, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef PARSER_DUMP_H_ +#define PARSER_DUMP_H_ + +#ifdef DUMP_SUPPORT + +#include <ostream> + +namespace ue2 { + +class Component; + +/** \brief Dump a text representation of the given component tree. Only + * available in DUMP_SUPPORT builds. */ +void dumpTree(std::ostream &os, const Component *const root); + +} // namespace ue2 + +#endif // DUMP_SUPPORT + +#endif // PARSER_DUMP_H_ diff --git a/contrib/libs/hyperscan/src/parser/parse_error.cpp b/contrib/libs/hyperscan/src/parser/parse_error.cpp index 68725a586e..e7f60b2645 100644 --- a/contrib/libs/hyperscan/src/parser/parse_error.cpp +++ b/contrib/libs/hyperscan/src/parser/parse_error.cpp @@ -1,56 +1,56 @@ -/* +/* * Copyright (c) 2015-2017, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -/** \file - * \brief Parse/Compile exceptions. - */ - - -#include "parse_error.h" - -#include <sstream> - -namespace ue2 { - -// this is just to get these out of the .h to avoid weak vtables - -ParseError::~ParseError() {} - -LocatedParseError::~LocatedParseError() {} - -void LocatedParseError::locate(size_t offset) { + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/** \file + * \brief Parse/Compile exceptions. + */ + + +#include "parse_error.h" + +#include <sstream> + +namespace ue2 { + +// this is just to get these out of the .h to avoid weak vtables + +ParseError::~ParseError() {} + +LocatedParseError::~LocatedParseError() {} + +void LocatedParseError::locate(size_t offset) { if (finalized) { return; } - std::ostringstream str; - str << reason << " at index " << offset << "."; - reason = str.str(); + std::ostringstream str; + str << reason << " at index " << offset << "."; + reason = str.str(); finalized = true; -} - -} +} + +} diff --git a/contrib/libs/hyperscan/src/parser/parse_error.h b/contrib/libs/hyperscan/src/parser/parse_error.h index 07169a6dc3..4556ed5e04 100644 --- a/contrib/libs/hyperscan/src/parser/parse_error.h +++ b/contrib/libs/hyperscan/src/parser/parse_error.h @@ -1,67 +1,67 @@ -/* +/* * Copyright (c) 2015-2017, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -/** \file - * \brief Parse/Compile exceptions. - */ - + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/** \file + * \brief Parse/Compile exceptions. + */ + #ifndef PARSE_ERROR_H #define PARSE_ERROR_H - -#include "util/compile_error.h" - -#include <string> - -namespace ue2 { - -/** \brief Error thrown internally by the Parser interface. */ -class ParseError : public CompileError { -public: - // Note: 'why' should describe why the error occurred and end with a - // full stop, but no line break. + +#include "util/compile_error.h" + +#include <string> + +namespace ue2 { + +/** \brief Error thrown internally by the Parser interface. */ +class ParseError : public CompileError { +public: + // Note: 'why' should describe why the error occurred and end with a + // full stop, but no line break. explicit ParseError(std::string why) : CompileError(std::move(why)) {} - - ~ParseError() override; -}; - -class LocatedParseError : public ParseError { -public: + + ~ParseError() override; +}; + +class LocatedParseError : public ParseError { +public: explicit LocatedParseError(std::string why) : ParseError(".") { reason = std::move(why); // don't use ParseError ctor - } - - ~LocatedParseError() override; - - void locate(size_t offset); + } + + ~LocatedParseError() override; + + void locate(size_t offset); private: bool finalized = false; //!< true when locate() has been called. -}; - -} // namespace ue2 - +}; + +} // namespace ue2 + #endif /* PARSE_ERROR_H */ diff --git a/contrib/libs/hyperscan/src/parser/parser_util.cpp b/contrib/libs/hyperscan/src/parser/parser_util.cpp index 214b361af7..a064b9743b 100644 --- a/contrib/libs/hyperscan/src/parser/parser_util.cpp +++ b/contrib/libs/hyperscan/src/parser/parser_util.cpp @@ -1,48 +1,48 @@ -/* - * Copyright (c) 2015, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -/** \file - * \brief Utilities (currently just ParseMode constructor) - */ - - -#include "hs.h" -#include "Parser.h" -#include "ue2common.h" - -namespace ue2 { - -ParseMode::ParseMode(u32 hs_flags) : - caseless(hs_flags & HS_FLAG_CASELESS), - dotall(hs_flags & HS_FLAG_DOTALL), - ignore_space(false), - multiline(hs_flags & HS_FLAG_MULTILINE), - ucp(hs_flags & HS_FLAG_UCP), - utf8(hs_flags & HS_FLAG_UTF8) {} - -} // namespace ue2 +/* + * Copyright (c) 2015, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/** \file + * \brief Utilities (currently just ParseMode constructor) + */ + + +#include "hs.h" +#include "Parser.h" +#include "ue2common.h" + +namespace ue2 { + +ParseMode::ParseMode(u32 hs_flags) : + caseless(hs_flags & HS_FLAG_CASELESS), + dotall(hs_flags & HS_FLAG_DOTALL), + ignore_space(false), + multiline(hs_flags & HS_FLAG_MULTILINE), + ucp(hs_flags & HS_FLAG_UCP), + utf8(hs_flags & HS_FLAG_UTF8) {} + +} // namespace ue2 diff --git a/contrib/libs/hyperscan/src/parser/position.h b/contrib/libs/hyperscan/src/parser/position.h index 1913e88e65..184cdb291e 100644 --- a/contrib/libs/hyperscan/src/parser/position.h +++ b/contrib/libs/hyperscan/src/parser/position.h @@ -1,107 +1,107 @@ -/* - * Copyright (c) 2015, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -/** \file - * \brief Per-position flags used during Glushkov construction, PositionInfo class. - */ - -#ifndef PARSER_POSITION_H -#define PARSER_POSITION_H - -#include "ue2common.h" - -#include <set> - -namespace ue2 { - -#define POS_FLAG_NOFLOAT (1 << 0) //!< don't wire to start-dotstar -#define POS_FLAG_MUST_FLOAT (1 << 1) //!< don't wire solely to start -#define POS_FLAG_FIDDLE_ACCEPT (1 << 2) //!< add a dot with an offset adjustment when wiring to accept -#define POS_FLAG_ASSERT_WORD_TO_NONWORD (1 << 3) //!< epsilon for word to nonword transition -#define POS_FLAG_ASSERT_NONWORD_TO_WORD (1 << 4) //!< epsilon for nonword to word transition -#define POS_FLAG_ASSERT_WORD_TO_WORD (1 << 5) //!< epsilon for word to word transition -#define POS_FLAG_ASSERT_NONWORD_TO_NONWORD (1 << 6) //!< epsilon for nonword to nonword transition - -/** vertex created by cloning startDs, not considered part of the match. - * mirrors POS_FLAG_FIDDLE_ACCEPT */ -#define POS_FLAG_VIRTUAL_START (1 << 7) - -/** multi-line ^ does not match \\n at end of buffer. As a result, we must never - * wire the \\n from ^ to eod */ -#define POS_FLAG_MULTILINE_START (1 << 8) - -#define POS_FLAG_ASSERT_WORD_TO_NONWORD_UCP (1 << 9) -#define POS_FLAG_ASSERT_NONWORD_TO_WORD_UCP (1 << 10) -#define POS_FLAG_ASSERT_WORD_TO_WORD_UCP (1 << 11) -#define POS_FLAG_ASSERT_NONWORD_TO_NONWORD_UCP (1 << 12) - -#define POS_FLAG_ASSERT_NONWORD_TO_ANY (POS_FLAG_ASSERT_NONWORD_TO_NONWORD \ - | POS_FLAG_ASSERT_NONWORD_TO_WORD) -#define POS_FLAG_ASSERT_WORD_TO_ANY (POS_FLAG_ASSERT_WORD_TO_NONWORD \ - | POS_FLAG_ASSERT_WORD_TO_WORD) - -#define POS_FLAG_ASSERT_ANY_TO_NONWORD (POS_FLAG_ASSERT_NONWORD_TO_NONWORD \ - | POS_FLAG_ASSERT_WORD_TO_NONWORD) -#define POS_FLAG_ASSERT_ANY_TO_WORD (POS_FLAG_ASSERT_NONWORD_TO_WORD \ - | POS_FLAG_ASSERT_WORD_TO_WORD) - -#define POS_FLAG_ASSERT_NONWORD_TO_ANY_UCP \ - (POS_FLAG_ASSERT_NONWORD_TO_NONWORD_UCP \ - | POS_FLAG_ASSERT_NONWORD_TO_WORD_UCP) -#define POS_FLAG_ASSERT_WORD_TO_ANY_UCP (POS_FLAG_ASSERT_WORD_TO_NONWORD_UCP \ - | POS_FLAG_ASSERT_WORD_TO_WORD_UCP) - -#define POS_FLAG_ASSERT_ANY_TO_NONWORD_UCP \ - (POS_FLAG_ASSERT_NONWORD_TO_NONWORD_UCP \ - | POS_FLAG_ASSERT_WORD_TO_NONWORD_UCP) -#define POS_FLAG_ASSERT_ANY_TO_WORD_UCP (POS_FLAG_ASSERT_WORD_TO_WORD_UCP \ - | POS_FLAG_ASSERT_NONWORD_TO_WORD_UCP) - -#define UCP_ASSERT_FLAGS (POS_FLAG_ASSERT_WORD_TO_ANY_UCP \ - | POS_FLAG_ASSERT_NONWORD_TO_ANY_UCP) - -#define NON_UCP_ASSERT_FLAGS (POS_FLAG_ASSERT_WORD_TO_ANY \ - | POS_FLAG_ASSERT_NONWORD_TO_ANY) - -/** do not wire to accept or other pos; may still wire to eod, etc if - * instructed */ -#define POS_FLAG_ONLY_ENDS (1 << 23) - -#define POS_FLAG_WIRE_EOD (1 << 24) /**< wire to accept eod */ -#define POS_FLAG_WIRE_NL_EOD (1 << 25) /**< wire to nl before accept eod */ -#define POS_FLAG_WIRE_NL_ACCEPT (1 << 26) /**< wire to nl before accept */ -#define POS_FLAG_NO_NL_EOD (1 << 27) /**< disallow nl before accept eod */ -#define POS_FLAG_NO_NL_ACCEPT (1 << 28) /**< disallow nl before accept */ - -/** \brief Parse and Glushkov construction use only. State number within the - * NFA as it is being constructed. */ -typedef u32 Position; - -} // namespace ue2 - -#endif // PARSER_POSITION_H +/* + * Copyright (c) 2015, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/** \file + * \brief Per-position flags used during Glushkov construction, PositionInfo class. + */ + +#ifndef PARSER_POSITION_H +#define PARSER_POSITION_H + +#include "ue2common.h" + +#include <set> + +namespace ue2 { + +#define POS_FLAG_NOFLOAT (1 << 0) //!< don't wire to start-dotstar +#define POS_FLAG_MUST_FLOAT (1 << 1) //!< don't wire solely to start +#define POS_FLAG_FIDDLE_ACCEPT (1 << 2) //!< add a dot with an offset adjustment when wiring to accept +#define POS_FLAG_ASSERT_WORD_TO_NONWORD (1 << 3) //!< epsilon for word to nonword transition +#define POS_FLAG_ASSERT_NONWORD_TO_WORD (1 << 4) //!< epsilon for nonword to word transition +#define POS_FLAG_ASSERT_WORD_TO_WORD (1 << 5) //!< epsilon for word to word transition +#define POS_FLAG_ASSERT_NONWORD_TO_NONWORD (1 << 6) //!< epsilon for nonword to nonword transition + +/** vertex created by cloning startDs, not considered part of the match. + * mirrors POS_FLAG_FIDDLE_ACCEPT */ +#define POS_FLAG_VIRTUAL_START (1 << 7) + +/** multi-line ^ does not match \\n at end of buffer. As a result, we must never + * wire the \\n from ^ to eod */ +#define POS_FLAG_MULTILINE_START (1 << 8) + +#define POS_FLAG_ASSERT_WORD_TO_NONWORD_UCP (1 << 9) +#define POS_FLAG_ASSERT_NONWORD_TO_WORD_UCP (1 << 10) +#define POS_FLAG_ASSERT_WORD_TO_WORD_UCP (1 << 11) +#define POS_FLAG_ASSERT_NONWORD_TO_NONWORD_UCP (1 << 12) + +#define POS_FLAG_ASSERT_NONWORD_TO_ANY (POS_FLAG_ASSERT_NONWORD_TO_NONWORD \ + | POS_FLAG_ASSERT_NONWORD_TO_WORD) +#define POS_FLAG_ASSERT_WORD_TO_ANY (POS_FLAG_ASSERT_WORD_TO_NONWORD \ + | POS_FLAG_ASSERT_WORD_TO_WORD) + +#define POS_FLAG_ASSERT_ANY_TO_NONWORD (POS_FLAG_ASSERT_NONWORD_TO_NONWORD \ + | POS_FLAG_ASSERT_WORD_TO_NONWORD) +#define POS_FLAG_ASSERT_ANY_TO_WORD (POS_FLAG_ASSERT_NONWORD_TO_WORD \ + | POS_FLAG_ASSERT_WORD_TO_WORD) + +#define POS_FLAG_ASSERT_NONWORD_TO_ANY_UCP \ + (POS_FLAG_ASSERT_NONWORD_TO_NONWORD_UCP \ + | POS_FLAG_ASSERT_NONWORD_TO_WORD_UCP) +#define POS_FLAG_ASSERT_WORD_TO_ANY_UCP (POS_FLAG_ASSERT_WORD_TO_NONWORD_UCP \ + | POS_FLAG_ASSERT_WORD_TO_WORD_UCP) + +#define POS_FLAG_ASSERT_ANY_TO_NONWORD_UCP \ + (POS_FLAG_ASSERT_NONWORD_TO_NONWORD_UCP \ + | POS_FLAG_ASSERT_WORD_TO_NONWORD_UCP) +#define POS_FLAG_ASSERT_ANY_TO_WORD_UCP (POS_FLAG_ASSERT_WORD_TO_WORD_UCP \ + | POS_FLAG_ASSERT_NONWORD_TO_WORD_UCP) + +#define UCP_ASSERT_FLAGS (POS_FLAG_ASSERT_WORD_TO_ANY_UCP \ + | POS_FLAG_ASSERT_NONWORD_TO_ANY_UCP) + +#define NON_UCP_ASSERT_FLAGS (POS_FLAG_ASSERT_WORD_TO_ANY \ + | POS_FLAG_ASSERT_NONWORD_TO_ANY) + +/** do not wire to accept or other pos; may still wire to eod, etc if + * instructed */ +#define POS_FLAG_ONLY_ENDS (1 << 23) + +#define POS_FLAG_WIRE_EOD (1 << 24) /**< wire to accept eod */ +#define POS_FLAG_WIRE_NL_EOD (1 << 25) /**< wire to nl before accept eod */ +#define POS_FLAG_WIRE_NL_ACCEPT (1 << 26) /**< wire to nl before accept */ +#define POS_FLAG_NO_NL_EOD (1 << 27) /**< disallow nl before accept eod */ +#define POS_FLAG_NO_NL_ACCEPT (1 << 28) /**< disallow nl before accept */ + +/** \brief Parse and Glushkov construction use only. State number within the + * NFA as it is being constructed. */ +typedef u32 Position; + +} // namespace ue2 + +#endif // PARSER_POSITION_H diff --git a/contrib/libs/hyperscan/src/parser/position_dump.h b/contrib/libs/hyperscan/src/parser/position_dump.h index 12e5c896e4..e4afbf415b 100644 --- a/contrib/libs/hyperscan/src/parser/position_dump.h +++ b/contrib/libs/hyperscan/src/parser/position_dump.h @@ -1,63 +1,63 @@ -/* - * Copyright (c) 2015, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -#ifndef POSITION_DUMP_H -#define POSITION_DUMP_H - -#include <sstream> - -namespace ue2 { - -#ifdef DUMP_SUPPORT -// implemented in buildstate.cpp -void dump(std::ostream &os, const PositionInfo &p); -#endif - -#if defined(DUMP_SUPPORT) || defined(DEBUG) - -template<class Iterator> -static UNUSED -std::string dumpPositions(const Iterator &begin, const Iterator &end) { - std::ostringstream oss; - oss << '['; - for (Iterator i = begin; i != end; ++i) { - if (i != begin) { - oss << ' '; - } - dump(oss, *i); - } - oss << ']'; - return oss.str(); -} - -#endif - -} // namespace ue2 - -#endif /* POSITION_DUMP_H */ - +/* + * Copyright (c) 2015, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef POSITION_DUMP_H +#define POSITION_DUMP_H + +#include <sstream> + +namespace ue2 { + +#ifdef DUMP_SUPPORT +// implemented in buildstate.cpp +void dump(std::ostream &os, const PositionInfo &p); +#endif + +#if defined(DUMP_SUPPORT) || defined(DEBUG) + +template<class Iterator> +static UNUSED +std::string dumpPositions(const Iterator &begin, const Iterator &end) { + std::ostringstream oss; + oss << '['; + for (Iterator i = begin; i != end; ++i) { + if (i != begin) { + oss << ' '; + } + dump(oss, *i); + } + oss << ']'; + return oss.str(); +} + +#endif + +} // namespace ue2 + +#endif /* POSITION_DUMP_H */ + diff --git a/contrib/libs/hyperscan/src/parser/position_info.h b/contrib/libs/hyperscan/src/parser/position_info.h index 2a1f078473..4e64a28029 100644 --- a/contrib/libs/hyperscan/src/parser/position_info.h +++ b/contrib/libs/hyperscan/src/parser/position_info.h @@ -1,57 +1,57 @@ -/* - * Copyright (c) 2015, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -#ifndef POSITION_INFO_H -#define POSITION_INFO_H - -#include "ue2common.h" -#include "position.h" - -namespace ue2 { - -/** Class representing a component state. */ -class PositionInfo { -public: - PositionInfo(unsigned int p) : pos(p), flags(0) {} - - bool operator<(const PositionInfo &other) const { - return pos < other.pos; - } - - bool operator==(const PositionInfo &other) const { - return pos == other.pos; - } - - Position pos; //!< state number - int flags; //!< from POS_FLAG_* above -}; - -} // namespace ue2 - -#endif /* POSITION_INFO_H */ - +/* + * Copyright (c) 2015, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef POSITION_INFO_H +#define POSITION_INFO_H + +#include "ue2common.h" +#include "position.h" + +namespace ue2 { + +/** Class representing a component state. */ +class PositionInfo { +public: + PositionInfo(unsigned int p) : pos(p), flags(0) {} + + bool operator<(const PositionInfo &other) const { + return pos < other.pos; + } + + bool operator==(const PositionInfo &other) const { + return pos == other.pos; + } + + Position pos; //!< state number + int flags; //!< from POS_FLAG_* above +}; + +} // namespace ue2 + +#endif /* POSITION_INFO_H */ + diff --git a/contrib/libs/hyperscan/src/parser/prefilter.cpp b/contrib/libs/hyperscan/src/parser/prefilter.cpp index 94e73ccfd5..f69362e4e3 100644 --- a/contrib/libs/hyperscan/src/parser/prefilter.cpp +++ b/contrib/libs/hyperscan/src/parser/prefilter.cpp @@ -1,300 +1,300 @@ -/* +/* * Copyright (c) 2015-2016, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -/** \file - * \brief Prefiltering component tree transformation. - */ -#include "ComponentAssertion.h" -#include "ComponentAtomicGroup.h" -#include "ComponentBackReference.h" -#include "ComponentBoundary.h" -#include "ComponentClass.h" -#include "ComponentCondReference.h" -#include "ComponentRepeat.h" -#include "ComponentSequence.h" -#include "ComponentVisitor.h" -#include "ComponentWordBoundary.h" -#include "ConstComponentVisitor.h" -#include "Parser.h" -#include "prefilter.h" - -#include <algorithm> -#include <stack> - -using namespace std; - -namespace ue2 { - -/** \brief Max number of positions a referent can have to be considered safe to - * replace a reference in prefiltering mode. */ -static const size_t MAX_REFERENT_POSITIONS = 1; - -/** \brief Constructs a \ref ComponentClass that matches a dot (any - * byte/codepoint, depending on whether UTF-8). */ -static -unique_ptr<ComponentClass> makeDotClass(const ParseMode &mode_in) { - ParseMode mode(mode_in); - mode.dotall = true; - return generateComponent(CLASS_ANY, false, mode); -} - -namespace { - -/** - * \brief Visitor used to determine if a given referent component is safe to - * replace its reference in prefiltering mode. Throws - * SafeReferentVisitor::Unsafe to terminate early on unsafe cases. */ -class SafeReferentVisitor : public DefaultConstComponentVisitor { -public: - struct Unsafe {}; - - SafeReferentVisitor() : numPositions(0) {} - - bool is_safe() const { - DEBUG_PRINTF("numPositions = %zu\n", numPositions); - return numPositions <= MAX_REFERENT_POSITIONS; - } - + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/** \file + * \brief Prefiltering component tree transformation. + */ +#include "ComponentAssertion.h" +#include "ComponentAtomicGroup.h" +#include "ComponentBackReference.h" +#include "ComponentBoundary.h" +#include "ComponentClass.h" +#include "ComponentCondReference.h" +#include "ComponentRepeat.h" +#include "ComponentSequence.h" +#include "ComponentVisitor.h" +#include "ComponentWordBoundary.h" +#include "ConstComponentVisitor.h" +#include "Parser.h" +#include "prefilter.h" + +#include <algorithm> +#include <stack> + +using namespace std; + +namespace ue2 { + +/** \brief Max number of positions a referent can have to be considered safe to + * replace a reference in prefiltering mode. */ +static const size_t MAX_REFERENT_POSITIONS = 1; + +/** \brief Constructs a \ref ComponentClass that matches a dot (any + * byte/codepoint, depending on whether UTF-8). */ +static +unique_ptr<ComponentClass> makeDotClass(const ParseMode &mode_in) { + ParseMode mode(mode_in); + mode.dotall = true; + return generateComponent(CLASS_ANY, false, mode); +} + +namespace { + +/** + * \brief Visitor used to determine if a given referent component is safe to + * replace its reference in prefiltering mode. Throws + * SafeReferentVisitor::Unsafe to terminate early on unsafe cases. */ +class SafeReferentVisitor : public DefaultConstComponentVisitor { +public: + struct Unsafe {}; + + SafeReferentVisitor() : numPositions(0) {} + + bool is_safe() const { + DEBUG_PRINTF("numPositions = %zu\n", numPositions); + return numPositions <= MAX_REFERENT_POSITIONS; + } + using DefaultConstComponentVisitor::pre; using DefaultConstComponentVisitor::post; - void pre(const AsciiComponentClass &) override { - numPositions++; - } - - void pre(const UTF8ComponentClass &) override { - // FIXME: we should be able to tell precisely how many positions this - // class will use. Right now, use the worst case. - numPositions += 4; - } - - void pre(const ComponentBoundary &) override { - numPositions++; - } - - void pre(const ComponentByte &) override { - numPositions++; - } - - void pre(const ComponentEUS &) override { - numPositions++; - } - - void pre(const ComponentRepeat &) override { - // Record the number of positions used before we visit the contents of - // the repeat. - countStack.push(numPositions); - } - - void post(const ComponentRepeat &c) override { - assert(!countStack.empty()); - size_t before = countStack.top(); - countStack.pop(); - assert(before <= numPositions); - - std::pair<u32, u32> bounds = c.getBounds(); - size_t subPositions = numPositions - before; - size_t copies = bounds.second < ComponentRepeat::NoLimit - ? bounds.second - : max(bounds.first, 1U); - numPositions = before + (subPositions * copies); - } - - void pre(const ComponentWordBoundary &) override { - // not quite accurate, as these are expanded out in assert - // resolution... - numPositions++; - } - - void pre(const ComponentBackReference &) override { - throw Unsafe(); - } - - void pre(const ComponentCondReference &) override { - throw Unsafe(); - } - -private: - size_t numPositions; - - // For temporary use - std::stack<size_t> countStack; -}; - -static -bool isSafeReferent(const Component &c) { - try { - SafeReferentVisitor vis; - c.accept(vis); - return vis.is_safe(); - } - catch (const SafeReferentVisitor::Unsafe &) { - return false; - } -} - -/** - * \brief Visitor to find the \ref ComponentSequence with a given reference ID - * or name: if found, the visitor will throw a const ptr to it. - */ -class FindSequenceVisitor : public DefaultConstComponentVisitor { -public: - explicit FindSequenceVisitor(unsigned ref_id) : id(ref_id) {} - explicit FindSequenceVisitor(const std::string &s) : name(s) {} - + void pre(const AsciiComponentClass &) override { + numPositions++; + } + + void pre(const UTF8ComponentClass &) override { + // FIXME: we should be able to tell precisely how many positions this + // class will use. Right now, use the worst case. + numPositions += 4; + } + + void pre(const ComponentBoundary &) override { + numPositions++; + } + + void pre(const ComponentByte &) override { + numPositions++; + } + + void pre(const ComponentEUS &) override { + numPositions++; + } + + void pre(const ComponentRepeat &) override { + // Record the number of positions used before we visit the contents of + // the repeat. + countStack.push(numPositions); + } + + void post(const ComponentRepeat &c) override { + assert(!countStack.empty()); + size_t before = countStack.top(); + countStack.pop(); + assert(before <= numPositions); + + std::pair<u32, u32> bounds = c.getBounds(); + size_t subPositions = numPositions - before; + size_t copies = bounds.second < ComponentRepeat::NoLimit + ? bounds.second + : max(bounds.first, 1U); + numPositions = before + (subPositions * copies); + } + + void pre(const ComponentWordBoundary &) override { + // not quite accurate, as these are expanded out in assert + // resolution... + numPositions++; + } + + void pre(const ComponentBackReference &) override { + throw Unsafe(); + } + + void pre(const ComponentCondReference &) override { + throw Unsafe(); + } + +private: + size_t numPositions; + + // For temporary use + std::stack<size_t> countStack; +}; + +static +bool isSafeReferent(const Component &c) { + try { + SafeReferentVisitor vis; + c.accept(vis); + return vis.is_safe(); + } + catch (const SafeReferentVisitor::Unsafe &) { + return false; + } +} + +/** + * \brief Visitor to find the \ref ComponentSequence with a given reference ID + * or name: if found, the visitor will throw a const ptr to it. + */ +class FindSequenceVisitor : public DefaultConstComponentVisitor { +public: + explicit FindSequenceVisitor(unsigned ref_id) : id(ref_id) {} + explicit FindSequenceVisitor(const std::string &s) : name(s) {} + using DefaultConstComponentVisitor::pre; - void pre(const ComponentSequence &c) override { - if (!name.empty()) { - if (c.getCaptureName() == name) { - throw &c; - } - } else if (c.getCaptureIndex() == id) { - throw &c; - } - } -private: - const std::string name; - const unsigned id = 0; -}; - -static -const ComponentSequence *findCapturingGroup(const Component *root, - FindSequenceVisitor &vis) { - try { - root->accept(vis); - DEBUG_PRINTF("group not found\n"); - return nullptr; - } catch (const ComponentSequence *seq) { - return seq; - } -} - -} // namespace - -/** - * \brief Visitor to apply prefilter reductions, swapping components for which - * we don't have real implementations with implementable ones. Any such - * replacement should produce a superset of the matches that would be produced - * by the original. - */ -class PrefilterVisitor : public DefaultComponentVisitor { -public: - PrefilterVisitor(Component *c, const ParseMode &m) : root(c), mode(m) {} - ~PrefilterVisitor() override; - + void pre(const ComponentSequence &c) override { + if (!name.empty()) { + if (c.getCaptureName() == name) { + throw &c; + } + } else if (c.getCaptureIndex() == id) { + throw &c; + } + } +private: + const std::string name; + const unsigned id = 0; +}; + +static +const ComponentSequence *findCapturingGroup(const Component *root, + FindSequenceVisitor &vis) { + try { + root->accept(vis); + DEBUG_PRINTF("group not found\n"); + return nullptr; + } catch (const ComponentSequence *seq) { + return seq; + } +} + +} // namespace + +/** + * \brief Visitor to apply prefilter reductions, swapping components for which + * we don't have real implementations with implementable ones. Any such + * replacement should produce a superset of the matches that would be produced + * by the original. + */ +class PrefilterVisitor : public DefaultComponentVisitor { +public: + PrefilterVisitor(Component *c, const ParseMode &m) : root(c), mode(m) {} + ~PrefilterVisitor() override; + using DefaultComponentVisitor::visit; - /** \brief Calls the visitor (recursively) on a new replacement component - * we've just created. Takes care of freeing it if the sequence is itself - * replaced. */ - template<class T> - Component *visit_replacement(T *r) { - Component *c = r->accept(*this); - if (c != r) { - delete r; - } - return c; - } - - Component *visit(ComponentBackReference *c) override { - assert(c); - - // If the referent is simple (represents a single position), then we - // replace the back-reference with a copy of it. - const ComponentSequence *ref = nullptr; - const std::string &ref_name = c->getRefName(); - const unsigned ref_id = c->getRefID(); - if (!ref_name.empty()) { - FindSequenceVisitor vis(ref_name); - ref = findCapturingGroup(root, vis); - } else if (ref_id > 0) { - FindSequenceVisitor vis(ref_id); - ref = findCapturingGroup(root, vis); - } - - if (ref && isSafeReferent(*ref)) { - DEBUG_PRINTF("found safe ref %p\n", ref); - ComponentSequence *seq = ref->clone(); - // Remove labels from cloned sequence. - seq->setCaptureName(""); - seq->setCaptureIndex(ComponentSequence::NOT_CAPTURED); - - return visit_replacement(seq); - } - - // Replace with ".*". - auto rep = makeComponentRepeat(makeDotClass(mode), 0, - ComponentRepeat::NoLimit, - ComponentRepeat::REPEAT_GREEDY); - return rep.release(); // FIXME: owning raw ptr - } - - Component *visit(UNUSED ComponentAssertion *c) override { - assert(c); - // Replace with an empty sequence. - return new ComponentSequence(); - } - - Component *visit(ComponentRepeat *c) override { - assert(c); - // Possessive repeats become greedy. - if (c->type == ComponentRepeat::REPEAT_POSSESSIVE) { - c->type = ComponentRepeat::REPEAT_GREEDY; - } - return c; - } - - Component *visit(ComponentAtomicGroup *c) override { - assert(c); - // Replace with a plain sequence containing the atomic group's - // children. - ComponentSequence *seq = new ComponentSequence(); - const auto &children = c->getChildren(); - for (const auto &child : children) { - assert(child); - seq->addComponent(unique_ptr<Component>(child->clone())); - } - - return visit_replacement(seq); - } - - Component *visit(UNUSED ComponentEUS *c) override { - assert(c); - // Replace with ".+". - auto rep = makeComponentRepeat(makeDotClass(mode), 1, - ComponentRepeat::NoLimit, - ComponentRepeat::REPEAT_GREEDY); - return rep.release(); // FIXME: owning raw ptr - } - - Component *visit(ComponentWordBoundary *c) override { - assert(c); + /** \brief Calls the visitor (recursively) on a new replacement component + * we've just created. Takes care of freeing it if the sequence is itself + * replaced. */ + template<class T> + Component *visit_replacement(T *r) { + Component *c = r->accept(*this); + if (c != r) { + delete r; + } + return c; + } + + Component *visit(ComponentBackReference *c) override { + assert(c); + + // If the referent is simple (represents a single position), then we + // replace the back-reference with a copy of it. + const ComponentSequence *ref = nullptr; + const std::string &ref_name = c->getRefName(); + const unsigned ref_id = c->getRefID(); + if (!ref_name.empty()) { + FindSequenceVisitor vis(ref_name); + ref = findCapturingGroup(root, vis); + } else if (ref_id > 0) { + FindSequenceVisitor vis(ref_id); + ref = findCapturingGroup(root, vis); + } + + if (ref && isSafeReferent(*ref)) { + DEBUG_PRINTF("found safe ref %p\n", ref); + ComponentSequence *seq = ref->clone(); + // Remove labels from cloned sequence. + seq->setCaptureName(""); + seq->setCaptureIndex(ComponentSequence::NOT_CAPTURED); + + return visit_replacement(seq); + } + + // Replace with ".*". + auto rep = makeComponentRepeat(makeDotClass(mode), 0, + ComponentRepeat::NoLimit, + ComponentRepeat::REPEAT_GREEDY); + return rep.release(); // FIXME: owning raw ptr + } + + Component *visit(UNUSED ComponentAssertion *c) override { + assert(c); + // Replace with an empty sequence. + return new ComponentSequence(); + } + + Component *visit(ComponentRepeat *c) override { + assert(c); + // Possessive repeats become greedy. + if (c->type == ComponentRepeat::REPEAT_POSSESSIVE) { + c->type = ComponentRepeat::REPEAT_GREEDY; + } + return c; + } + + Component *visit(ComponentAtomicGroup *c) override { + assert(c); + // Replace with a plain sequence containing the atomic group's + // children. + ComponentSequence *seq = new ComponentSequence(); + const auto &children = c->getChildren(); + for (const auto &child : children) { + assert(child); + seq->addComponent(unique_ptr<Component>(child->clone())); + } + + return visit_replacement(seq); + } + + Component *visit(UNUSED ComponentEUS *c) override { + assert(c); + // Replace with ".+". + auto rep = makeComponentRepeat(makeDotClass(mode), 1, + ComponentRepeat::NoLimit, + ComponentRepeat::REPEAT_GREEDY); + return rep.release(); // FIXME: owning raw ptr + } + + Component *visit(ComponentWordBoundary *c) override { + assert(c); // TODO: Right now, we do not have correct code for resolving these // when prefiltering is on, UCP is on, and UTF-8 is *off*. For now, we @@ -305,52 +305,52 @@ public: } // All other cases can be prefiltered. - c->setPrefilter(true); - return c; - } - - Component *visit(ComponentCondReference *c) override { - assert(c); - // Replace with a plain sequence containing the conditional reference's - // children. - ComponentSequence *seq = new ComponentSequence(); - const auto &children = c->getChildren(); - - // Empty children is accepted by PCRE as a "do nothing" case. - if (children.empty()) { - return seq; - } - - for (const auto &child : children) { - assert(child); - seq->addComponent(unique_ptr<Component>(child->clone())); - } - - // If the conditional reference had just a YES branch, we want this to - // be an alternation with an empty sequence (the NO branch). - if (!c->hasBothBranches) { - seq->addAlternation(); - seq->finalize(); - } - - return visit_replacement(seq); - } - -private: - Component *root; - const ParseMode &mode; -}; - -PrefilterVisitor::~PrefilterVisitor() {} - -void prefilterTree(unique_ptr<Component> &root, const ParseMode &mode) { - assert(root); - PrefilterVisitor vis(root.get(), mode); - - Component *c = root->accept(vis); - if (c != root.get()) { - root.reset(c); - } -} - -} // namespace ue2 + c->setPrefilter(true); + return c; + } + + Component *visit(ComponentCondReference *c) override { + assert(c); + // Replace with a plain sequence containing the conditional reference's + // children. + ComponentSequence *seq = new ComponentSequence(); + const auto &children = c->getChildren(); + + // Empty children is accepted by PCRE as a "do nothing" case. + if (children.empty()) { + return seq; + } + + for (const auto &child : children) { + assert(child); + seq->addComponent(unique_ptr<Component>(child->clone())); + } + + // If the conditional reference had just a YES branch, we want this to + // be an alternation with an empty sequence (the NO branch). + if (!c->hasBothBranches) { + seq->addAlternation(); + seq->finalize(); + } + + return visit_replacement(seq); + } + +private: + Component *root; + const ParseMode &mode; +}; + +PrefilterVisitor::~PrefilterVisitor() {} + +void prefilterTree(unique_ptr<Component> &root, const ParseMode &mode) { + assert(root); + PrefilterVisitor vis(root.get(), mode); + + Component *c = root->accept(vis); + if (c != root.get()) { + root.reset(c); + } +} + +} // namespace ue2 diff --git a/contrib/libs/hyperscan/src/parser/prefilter.h b/contrib/libs/hyperscan/src/parser/prefilter.h index d7d61fc272..c2665f4096 100644 --- a/contrib/libs/hyperscan/src/parser/prefilter.h +++ b/contrib/libs/hyperscan/src/parser/prefilter.h @@ -1,48 +1,48 @@ -/* - * Copyright (c) 2015, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -#ifndef PARSER_PREFILTER_H -#define PARSER_PREFILTER_H - -#include <memory> - -namespace ue2 { - -class Component; -struct ParseMode; - -/** - * \brief Applies prefiltering transformations to the given component. - * - * May reseat the given Component pointer. - */ -void prefilterTree(std::unique_ptr<Component> &root, const ParseMode &mode); - -} // namespace ue2 - -#endif // PARSER_PREFILTER_H +/* + * Copyright (c) 2015, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef PARSER_PREFILTER_H +#define PARSER_PREFILTER_H + +#include <memory> + +namespace ue2 { + +class Component; +struct ParseMode; + +/** + * \brief Applies prefiltering transformations to the given component. + * + * May reseat the given Component pointer. + */ +void prefilterTree(std::unique_ptr<Component> &root, const ParseMode &mode); + +} // namespace ue2 + +#endif // PARSER_PREFILTER_H diff --git a/contrib/libs/hyperscan/src/parser/shortcut_literal.cpp b/contrib/libs/hyperscan/src/parser/shortcut_literal.cpp index 0f0a1663e2..a5d67f30d8 100644 --- a/contrib/libs/hyperscan/src/parser/shortcut_literal.cpp +++ b/contrib/libs/hyperscan/src/parser/shortcut_literal.cpp @@ -1,205 +1,205 @@ -/* +/* * Copyright (c) 2015-2019, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -/** \file - * \brief Shortcut literal pass: directly add literal components to Rose. - */ -#include "AsciiComponentClass.h" -#include "Utf8ComponentClass.h" -#include "ComponentAssertion.h" -#include "ComponentAtomicGroup.h" -#include "ComponentBackReference.h" -#include "ComponentBoundary.h" -#include "ComponentClass.h" -#include "ComponentCondReference.h" -#include "ComponentRepeat.h" -#include "ComponentSequence.h" -#include "ComponentVisitor.h" -#include "ComponentWordBoundary.h" -#include "ConstComponentVisitor.h" -#include "parse_error.h" -#include "shortcut_literal.h" -#include "grey.h" -#include "nfagraph/ng.h" -#include "compiler/compiler.h" -#include "util/ue2string.h" -#include "ue2common.h" - -#include <stack> - -using namespace std; - -namespace ue2 { - -/** - * \brief Visitor that constructs a ue2_literal from a component tree. - * - * If a component that can't be part of a literal is encountered, this visitor - * will throw ConstructLiteralVisitor::NotLiteral. - */ -class ConstructLiteralVisitor : public ConstComponentVisitor { -public: - ~ConstructLiteralVisitor() override; - - /** \brief Thrown if this component does not represent a literal. */ - struct NotLiteral {}; - - void pre(const AsciiComponentClass &c) override { - const CharReach &cr = c.cr; - const size_t width = cr.count(); - if (width == 1) { - lit.push_back(cr.find_first(), false); - } else if (width == 2 && cr.isCaselessChar()) { - lit.push_back(cr.find_first(), true); - } else { - throw NotLiteral(); - } - } - - void pre(const ComponentRepeat &c) override { - if (c.m_min == 0 || c.m_min != c.m_max) { - throw NotLiteral(); - } - - if (c.m_max < ComponentRepeat::NoLimit && c.m_max > 32767) { - throw ParseError("Bounded repeat is too large."); - } - - // Store the current length of the literal; in this repeat's post() - // call we will append N-1 more copies of [index..end]. - repeat_stack.push(lit.length()); - } - - void post(const ComponentRepeat &c) override { - // Add N-1 copies of the string between the entry to the repeat and the - // current end of the literal. - assert(!repeat_stack.empty()); - const ue2_literal suffix = lit.substr(repeat_stack.top()); - repeat_stack.pop(); - - for (unsigned i = 1; i < c.m_min; i++) { - lit += suffix; - } - } - - void pre(const ComponentSequence &) override { - // Pass through. - } - - void pre(const ComponentAlternation &) override { throw NotLiteral(); } - void pre(const ComponentAssertion &) override { throw NotLiteral(); } - void pre(const ComponentAtomicGroup &) override { throw NotLiteral(); } - void pre(const ComponentBackReference &) override { throw NotLiteral(); } - void pre(const ComponentBoundary &) override { throw NotLiteral(); } - void pre(const ComponentByte &) override { throw NotLiteral(); } - void pre(const ComponentCondReference &) override { throw NotLiteral(); } - void pre(const ComponentEmpty &) override { throw NotLiteral(); } - void pre(const ComponentEUS &) override { throw NotLiteral(); } - void pre(const ComponentWordBoundary &) override { throw NotLiteral(); } - void pre(const UTF8ComponentClass &) override { throw NotLiteral(); } - - void during(const AsciiComponentClass &) override {} - void during(const ComponentAlternation &) override {} - void during(const ComponentAssertion &) override {} - void during(const ComponentAtomicGroup &) override {} - void during(const ComponentBackReference &) override {} - void during(const ComponentBoundary &) override {} - void during(const ComponentByte &) override {} - void during(const ComponentCondReference &) override {} - void during(const ComponentEmpty &) override {} - void during(const ComponentEUS &) override {} - void during(const ComponentRepeat &) override {} - void during(const ComponentSequence &) override {} - void during(const ComponentWordBoundary &) override {} - void during(const UTF8ComponentClass &) override {} - - void post(const AsciiComponentClass &) override {} - void post(const ComponentAlternation &) override {} - void post(const ComponentAssertion &) override {} - void post(const ComponentAtomicGroup &) override {} - void post(const ComponentBackReference &) override {} - void post(const ComponentBoundary &) override {} - void post(const ComponentByte &) override {} - void post(const ComponentCondReference &) override {} - void post(const ComponentEmpty &) override {} - void post(const ComponentEUS &) override {} - void post(const ComponentSequence &) override {} - void post(const ComponentWordBoundary &) override {} - void post(const UTF8ComponentClass &) override {} - - ue2_literal lit; - stack<size_t> repeat_stack; //!< index of entry to repeat. -}; - -ConstructLiteralVisitor::~ConstructLiteralVisitor() {} - -/** \brief True if the literal expression \a expr could be added to Rose. */ + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/** \file + * \brief Shortcut literal pass: directly add literal components to Rose. + */ +#include "AsciiComponentClass.h" +#include "Utf8ComponentClass.h" +#include "ComponentAssertion.h" +#include "ComponentAtomicGroup.h" +#include "ComponentBackReference.h" +#include "ComponentBoundary.h" +#include "ComponentClass.h" +#include "ComponentCondReference.h" +#include "ComponentRepeat.h" +#include "ComponentSequence.h" +#include "ComponentVisitor.h" +#include "ComponentWordBoundary.h" +#include "ConstComponentVisitor.h" +#include "parse_error.h" +#include "shortcut_literal.h" +#include "grey.h" +#include "nfagraph/ng.h" +#include "compiler/compiler.h" +#include "util/ue2string.h" +#include "ue2common.h" + +#include <stack> + +using namespace std; + +namespace ue2 { + +/** + * \brief Visitor that constructs a ue2_literal from a component tree. + * + * If a component that can't be part of a literal is encountered, this visitor + * will throw ConstructLiteralVisitor::NotLiteral. + */ +class ConstructLiteralVisitor : public ConstComponentVisitor { +public: + ~ConstructLiteralVisitor() override; + + /** \brief Thrown if this component does not represent a literal. */ + struct NotLiteral {}; + + void pre(const AsciiComponentClass &c) override { + const CharReach &cr = c.cr; + const size_t width = cr.count(); + if (width == 1) { + lit.push_back(cr.find_first(), false); + } else if (width == 2 && cr.isCaselessChar()) { + lit.push_back(cr.find_first(), true); + } else { + throw NotLiteral(); + } + } + + void pre(const ComponentRepeat &c) override { + if (c.m_min == 0 || c.m_min != c.m_max) { + throw NotLiteral(); + } + + if (c.m_max < ComponentRepeat::NoLimit && c.m_max > 32767) { + throw ParseError("Bounded repeat is too large."); + } + + // Store the current length of the literal; in this repeat's post() + // call we will append N-1 more copies of [index..end]. + repeat_stack.push(lit.length()); + } + + void post(const ComponentRepeat &c) override { + // Add N-1 copies of the string between the entry to the repeat and the + // current end of the literal. + assert(!repeat_stack.empty()); + const ue2_literal suffix = lit.substr(repeat_stack.top()); + repeat_stack.pop(); + + for (unsigned i = 1; i < c.m_min; i++) { + lit += suffix; + } + } + + void pre(const ComponentSequence &) override { + // Pass through. + } + + void pre(const ComponentAlternation &) override { throw NotLiteral(); } + void pre(const ComponentAssertion &) override { throw NotLiteral(); } + void pre(const ComponentAtomicGroup &) override { throw NotLiteral(); } + void pre(const ComponentBackReference &) override { throw NotLiteral(); } + void pre(const ComponentBoundary &) override { throw NotLiteral(); } + void pre(const ComponentByte &) override { throw NotLiteral(); } + void pre(const ComponentCondReference &) override { throw NotLiteral(); } + void pre(const ComponentEmpty &) override { throw NotLiteral(); } + void pre(const ComponentEUS &) override { throw NotLiteral(); } + void pre(const ComponentWordBoundary &) override { throw NotLiteral(); } + void pre(const UTF8ComponentClass &) override { throw NotLiteral(); } + + void during(const AsciiComponentClass &) override {} + void during(const ComponentAlternation &) override {} + void during(const ComponentAssertion &) override {} + void during(const ComponentAtomicGroup &) override {} + void during(const ComponentBackReference &) override {} + void during(const ComponentBoundary &) override {} + void during(const ComponentByte &) override {} + void during(const ComponentCondReference &) override {} + void during(const ComponentEmpty &) override {} + void during(const ComponentEUS &) override {} + void during(const ComponentRepeat &) override {} + void during(const ComponentSequence &) override {} + void during(const ComponentWordBoundary &) override {} + void during(const UTF8ComponentClass &) override {} + + void post(const AsciiComponentClass &) override {} + void post(const ComponentAlternation &) override {} + void post(const ComponentAssertion &) override {} + void post(const ComponentAtomicGroup &) override {} + void post(const ComponentBackReference &) override {} + void post(const ComponentBoundary &) override {} + void post(const ComponentByte &) override {} + void post(const ComponentCondReference &) override {} + void post(const ComponentEmpty &) override {} + void post(const ComponentEUS &) override {} + void post(const ComponentSequence &) override {} + void post(const ComponentWordBoundary &) override {} + void post(const UTF8ComponentClass &) override {} + + ue2_literal lit; + stack<size_t> repeat_stack; //!< index of entry to repeat. +}; + +ConstructLiteralVisitor::~ConstructLiteralVisitor() {} + +/** \brief True if the literal expression \a expr could be added to Rose. */ bool shortcutLiteral(NG &ng, const ParsedExpression &pe) { assert(pe.component); - + if (!ng.cc.grey.allowLiteral) { - return false; - } - + return false; + } + const auto &expr = pe.expr; - // XXX: don't shortcut literals with extended params (yet) + // XXX: don't shortcut literals with extended params (yet) if (expr.min_offset || expr.max_offset != MAX_OFFSET || expr.min_length || expr.edit_distance || expr.hamm_distance) { - DEBUG_PRINTF("extended params not allowed\n"); - return false; - } - - ConstructLiteralVisitor vis; - try { + DEBUG_PRINTF("extended params not allowed\n"); + return false; + } + + ConstructLiteralVisitor vis; + try { assert(pe.component); pe.component->accept(vis); - assert(vis.repeat_stack.empty()); - } catch (const ConstructLiteralVisitor::NotLiteral&) { - DEBUG_PRINTF("not a literal\n"); - return false; - } - - const ue2_literal &lit = vis.lit; - - if (lit.empty()) { - DEBUG_PRINTF("empty literal\n"); - return false; - } - - if (expr.highlander && lit.length() <= 1) { - DEBUG_PRINTF("not shortcutting SEP literal\n"); - return false; - } - - DEBUG_PRINTF("constructed literal %s\n", dumpString(lit).c_str()); + assert(vis.repeat_stack.empty()); + } catch (const ConstructLiteralVisitor::NotLiteral&) { + DEBUG_PRINTF("not a literal\n"); + return false; + } + + const ue2_literal &lit = vis.lit; + + if (lit.empty()) { + DEBUG_PRINTF("empty literal\n"); + return false; + } + + if (expr.highlander && lit.length() <= 1) { + DEBUG_PRINTF("not shortcutting SEP literal\n"); + return false; + } + + DEBUG_PRINTF("constructed literal %s\n", dumpString(lit).c_str()); return ng.addLiteral(lit, expr.index, expr.report, expr.highlander, expr.som, expr.quiet); -} - -} // namespace ue2 +} + +} // namespace ue2 diff --git a/contrib/libs/hyperscan/src/parser/shortcut_literal.h b/contrib/libs/hyperscan/src/parser/shortcut_literal.h index 2129475211..60cffd00ed 100644 --- a/contrib/libs/hyperscan/src/parser/shortcut_literal.h +++ b/contrib/libs/hyperscan/src/parser/shortcut_literal.h @@ -1,46 +1,46 @@ -/* - * Copyright (c) 2015, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -/** \file - * \brief Shortcut literal pass: directly add literal components to Rose. - */ - -#ifndef SHORTCUT_LITERAL_H -#define SHORTCUT_LITERAL_H - -namespace ue2 { - -class NG; -class ParsedExpression; - -/** \brief True if the literal expression \a expr could be added to Rose. */ -bool shortcutLiteral(NG &ng, const ParsedExpression &expr); - -} // namespace ue2 - -#endif +/* + * Copyright (c) 2015, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/** \file + * \brief Shortcut literal pass: directly add literal components to Rose. + */ + +#ifndef SHORTCUT_LITERAL_H +#define SHORTCUT_LITERAL_H + +namespace ue2 { + +class NG; +class ParsedExpression; + +/** \brief True if the literal expression \a expr could be added to Rose. */ +bool shortcutLiteral(NG &ng, const ParsedExpression &expr); + +} // namespace ue2 + +#endif diff --git a/contrib/libs/hyperscan/src/parser/ucp_table.cpp b/contrib/libs/hyperscan/src/parser/ucp_table.cpp index cdc683a607..fc1330fe7f 100644 --- a/contrib/libs/hyperscan/src/parser/ucp_table.cpp +++ b/contrib/libs/hyperscan/src/parser/ucp_table.cpp @@ -1,132 +1,132 @@ -/* +/* * Copyright (c) 2015-2016, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -#include "Utf8ComponentClass.h" - -#include <algorithm> - -using namespace std; - -namespace ue2 { - -#define UCP_FN(cat) \ -CodePointSet getUcp##cat(void) { \ - CodePointSet rv; \ - for (u32 i = 0; i < ARRAY_LENGTH(ucp_##cat##_def); i += 2) { \ - rv.setRange(ucp_##cat##_def[i], ucp_##cat##_def[i + 1]); \ - } \ - return rv; \ -} - -struct unicase { - unichar base; - unichar caseless; -}; - -} // namespace ue2 - -#define UCP_TABLE_DEFINE_FN -#include "ucp_table.h" - -namespace ue2 { - -static -bool operator<(const unicase &a, const unicase &b) { - if (a.base < b.base) { - return true; - } - - if (a.base > b.base) { - return false; - } - - return a.caseless < b.caseless; -} - -void make_caseless(CodePointSet *cps) { - assert(cps); - DEBUG_PRINTF("hello\n"); - // Cheap optimisation: if we are empty or a dot, we're already caseless. - if (cps->begin() == cps->end()) { - DEBUG_PRINTF("empty\n"); - return; - } - if (lower(*cps->begin()) == 0 && upper(*cps->begin()) == MAX_UNICODE) { - DEBUG_PRINTF("dot\n"); - return; - } - - CodePointSet base = *cps; - + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include "Utf8ComponentClass.h" + +#include <algorithm> + +using namespace std; + +namespace ue2 { + +#define UCP_FN(cat) \ +CodePointSet getUcp##cat(void) { \ + CodePointSet rv; \ + for (u32 i = 0; i < ARRAY_LENGTH(ucp_##cat##_def); i += 2) { \ + rv.setRange(ucp_##cat##_def[i], ucp_##cat##_def[i + 1]); \ + } \ + return rv; \ +} + +struct unicase { + unichar base; + unichar caseless; +}; + +} // namespace ue2 + +#define UCP_TABLE_DEFINE_FN +#include "ucp_table.h" + +namespace ue2 { + +static +bool operator<(const unicase &a, const unicase &b) { + if (a.base < b.base) { + return true; + } + + if (a.base > b.base) { + return false; + } + + return a.caseless < b.caseless; +} + +void make_caseless(CodePointSet *cps) { + assert(cps); + DEBUG_PRINTF("hello\n"); + // Cheap optimisation: if we are empty or a dot, we're already caseless. + if (cps->begin() == cps->end()) { + DEBUG_PRINTF("empty\n"); + return; + } + if (lower(*cps->begin()) == 0 && upper(*cps->begin()) == MAX_UNICODE) { + DEBUG_PRINTF("dot\n"); + return; + } + + CodePointSet base = *cps; + auto uc_begin = begin(ucp_caseless_def); auto uc_end = end(ucp_caseless_def); DEBUG_PRINTF("uc len %zd\n", distance(uc_begin, uc_end)); - + for (const auto &elem : base) { unichar b = lower(elem); unichar e = upper(elem) + 1; - - for (; b < e; b++) { - DEBUG_PRINTF("decasing %x\n", b); - unicase test = {b, 0}; /* NUL is not a caseless version of anything, - * so we are ok */ - uc_begin = lower_bound(uc_begin, uc_end, test); - if (uc_begin == uc_end) { - DEBUG_PRINTF("EOL\n"); - return; - } + + for (; b < e; b++) { + DEBUG_PRINTF("decasing %x\n", b); + unicase test = {b, 0}; /* NUL is not a caseless version of anything, + * so we are ok */ + uc_begin = lower_bound(uc_begin, uc_end, test); + if (uc_begin == uc_end) { + DEBUG_PRINTF("EOL\n"); + return; + } while (uc_begin != uc_end && uc_begin->base == b) { - DEBUG_PRINTF("at {%x,%x}\n", uc_begin->base, uc_begin->caseless); - cps->set(uc_begin->caseless); - ++uc_begin; - } - } - } -} - -/** \brief Flip the case of the codepoint in c, if possible. - * - * Note that this assumes a one-to-one case mapping, which (though not - * realistic) is what PCRE does. */ -bool flip_case(unichar *c) { - assert(c); - + DEBUG_PRINTF("at {%x,%x}\n", uc_begin->base, uc_begin->caseless); + cps->set(uc_begin->caseless); + ++uc_begin; + } + } + } +} + +/** \brief Flip the case of the codepoint in c, if possible. + * + * Note that this assumes a one-to-one case mapping, which (though not + * realistic) is what PCRE does. */ +bool flip_case(unichar *c) { + assert(c); + const unicase test = { *c, 0 }; - + const auto uc_begin = begin(ucp_caseless_def); const auto uc_end = end(ucp_caseless_def); const auto f = lower_bound(uc_begin, uc_end, test); if (f != uc_end && f->base == *c) { - DEBUG_PRINTF("flipped c=%x to %x\n", *c, f->caseless); - *c = f->caseless; - return true; - } - return false; -} - -} // namespace ue2 + DEBUG_PRINTF("flipped c=%x to %x\n", *c, f->caseless); + *c = f->caseless; + return true; + } + return false; +} + +} // namespace ue2 diff --git a/contrib/libs/hyperscan/src/parser/ucp_table.h b/contrib/libs/hyperscan/src/parser/ucp_table.h index d420db2b5d..269a971cb1 100644 --- a/contrib/libs/hyperscan/src/parser/ucp_table.h +++ b/contrib/libs/hyperscan/src/parser/ucp_table.h @@ -1,11043 +1,11043 @@ -/* - * Copyright (c) 2015, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -#ifndef UCP_TABLE_H -#define UCP_TABLE_H - -/* Generated by tools/scripts/ucp.py based on unicode database - * - * Do not hand edit - */ - -namespace ue2 { - -class CodePointSet; -void make_caseless(CodePointSet *cps); -bool flip_case(unichar *c); - -CodePointSet getUcpC(void); -CodePointSet getUcpCc(void); -CodePointSet getUcpCf(void); -CodePointSet getUcpCn(void); -CodePointSet getUcpCo(void); -CodePointSet getUcpCs(void); -CodePointSet getUcpL(void); -CodePointSet getUcpL_and(void); -CodePointSet getUcpLl(void); -CodePointSet getUcpLm(void); -CodePointSet getUcpLo(void); -CodePointSet getUcpLt(void); -CodePointSet getUcpLu(void); -CodePointSet getUcpM(void); -CodePointSet getUcpMc(void); -CodePointSet getUcpMe(void); -CodePointSet getUcpMn(void); -CodePointSet getUcpN(void); -CodePointSet getUcpNd(void); -CodePointSet getUcpNl(void); -CodePointSet getUcpNo(void); -CodePointSet getUcpP(void); -CodePointSet getUcpPc(void); -CodePointSet getUcpPd(void); -CodePointSet getUcpPe(void); -CodePointSet getUcpPf(void); -CodePointSet getUcpPi(void); -CodePointSet getUcpPo(void); -CodePointSet getUcpPs(void); -CodePointSet getUcpS(void); -CodePointSet getUcpSc(void); -CodePointSet getUcpSk(void); -CodePointSet getUcpSm(void); -CodePointSet getUcpSo(void); -CodePointSet getUcpXan(void); -CodePointSet getUcpXps(void); -CodePointSet getUcpXsp(void); -CodePointSet getUcpXwd(void); -CodePointSet getUcpZ(void); -CodePointSet getUcpZl(void); -CodePointSet getUcpZp(void); -CodePointSet getUcpZs(void); -CodePointSet getUcpArabic(void); -CodePointSet getUcpArmenian(void); -CodePointSet getUcpAvestan(void); -CodePointSet getUcpBalinese(void); -CodePointSet getUcpBamum(void); -CodePointSet getUcpBassa_Vah(void); -CodePointSet getUcpBatak(void); -CodePointSet getUcpBengali(void); -CodePointSet getUcpBopomofo(void); -CodePointSet getUcpBrahmi(void); -CodePointSet getUcpBraille(void); -CodePointSet getUcpBuginese(void); -CodePointSet getUcpBuhid(void); -CodePointSet getUcpCanadian_Aboriginal(void); -CodePointSet getUcpCarian(void); -CodePointSet getUcpCaucasian_Albanian(void); -CodePointSet getUcpChakma(void); -CodePointSet getUcpCham(void); -CodePointSet getUcpCherokee(void); -CodePointSet getUcpCommon(void); -CodePointSet getUcpCoptic(void); -CodePointSet getUcpCuneiform(void); -CodePointSet getUcpCypriot(void); -CodePointSet getUcpCyrillic(void); -CodePointSet getUcpDeseret(void); -CodePointSet getUcpDevanagari(void); -CodePointSet getUcpDuployan(void); -CodePointSet getUcpEgyptian_Hieroglyphs(void); -CodePointSet getUcpElbasan(void); -CodePointSet getUcpEthiopic(void); -CodePointSet getUcpGeorgian(void); -CodePointSet getUcpGlagolitic(void); -CodePointSet getUcpGothic(void); -CodePointSet getUcpGrantha(void); -CodePointSet getUcpGreek(void); -CodePointSet getUcpGujarati(void); -CodePointSet getUcpGurmukhi(void); -CodePointSet getUcpHan(void); -CodePointSet getUcpHangul(void); -CodePointSet getUcpHanunoo(void); -CodePointSet getUcpHebrew(void); -CodePointSet getUcpHiragana(void); -CodePointSet getUcpImperial_Aramaic(void); -CodePointSet getUcpInherited(void); -CodePointSet getUcpInscriptional_Pahlavi(void); -CodePointSet getUcpInscriptional_Parthian(void); -CodePointSet getUcpJavanese(void); -CodePointSet getUcpKaithi(void); -CodePointSet getUcpKannada(void); -CodePointSet getUcpKatakana(void); -CodePointSet getUcpKayah_Li(void); -CodePointSet getUcpKharoshthi(void); -CodePointSet getUcpKhmer(void); -CodePointSet getUcpKhojki(void); -CodePointSet getUcpKhudawadi(void); -CodePointSet getUcpLao(void); -CodePointSet getUcpLatin(void); -CodePointSet getUcpLepcha(void); -CodePointSet getUcpLimbu(void); -CodePointSet getUcpLinear_A(void); -CodePointSet getUcpLinear_B(void); -CodePointSet getUcpLisu(void); -CodePointSet getUcpLycian(void); -CodePointSet getUcpLydian(void); -CodePointSet getUcpMahajani(void); -CodePointSet getUcpMalayalam(void); -CodePointSet getUcpMandaic(void); -CodePointSet getUcpManichaean(void); -CodePointSet getUcpMeetei_Mayek(void); -CodePointSet getUcpMende_Kikakui(void); -CodePointSet getUcpMeroitic_Cursive(void); -CodePointSet getUcpMeroitic_Hieroglyphs(void); -CodePointSet getUcpMiao(void); -CodePointSet getUcpModi(void); -CodePointSet getUcpMongolian(void); -CodePointSet getUcpMro(void); -CodePointSet getUcpMyanmar(void); -CodePointSet getUcpNabataean(void); -CodePointSet getUcpNew_Tai_Lue(void); -CodePointSet getUcpNko(void); -CodePointSet getUcpOgham(void); -CodePointSet getUcpOl_Chiki(void); -CodePointSet getUcpOld_Italic(void); -CodePointSet getUcpOld_North_Arabian(void); -CodePointSet getUcpOld_Permic(void); -CodePointSet getUcpOld_Persian(void); -CodePointSet getUcpOld_South_Arabian(void); -CodePointSet getUcpOld_Turkic(void); -CodePointSet getUcpOriya(void); -CodePointSet getUcpOsmanya(void); -CodePointSet getUcpPahawh_Hmong(void); -CodePointSet getUcpPalmyrene(void); -CodePointSet getUcpPau_Cin_Hau(void); -CodePointSet getUcpPhags_Pa(void); -CodePointSet getUcpPhoenician(void); -CodePointSet getUcpPsalter_Pahlavi(void); -CodePointSet getUcpRejang(void); -CodePointSet getUcpRunic(void); -CodePointSet getUcpSamaritan(void); -CodePointSet getUcpSaurashtra(void); -CodePointSet getUcpSharada(void); -CodePointSet getUcpShavian(void); -CodePointSet getUcpSiddham(void); -CodePointSet getUcpSinhala(void); -CodePointSet getUcpSora_Sompeng(void); -CodePointSet getUcpSundanese(void); -CodePointSet getUcpSyloti_Nagri(void); -CodePointSet getUcpSyriac(void); -CodePointSet getUcpTagalog(void); -CodePointSet getUcpTagbanwa(void); -CodePointSet getUcpTai_Le(void); -CodePointSet getUcpTai_Tham(void); -CodePointSet getUcpTai_Viet(void); -CodePointSet getUcpTakri(void); -CodePointSet getUcpTamil(void); -CodePointSet getUcpTelugu(void); -CodePointSet getUcpThaana(void); -CodePointSet getUcpThai(void); -CodePointSet getUcpTibetan(void); -CodePointSet getUcpTifinagh(void); -CodePointSet getUcpTirhuta(void); -CodePointSet getUcpUgaritic(void); -CodePointSet getUcpVai(void); -CodePointSet getUcpWarang_Citi(void); -CodePointSet getUcpYi(void); - -#ifdef UCP_TABLE_DEFINE_FN - -static const unichar ucp_C_def[] = { - 0x0, 0x1f, - 0x7f, 0x9f, - 0xad, 0xad, - 0x378, 0x379, - 0x380, 0x383, - 0x38b, 0x38b, - 0x38d, 0x38d, - 0x3a2, 0x3a2, - 0x530, 0x530, - 0x557, 0x558, - 0x560, 0x560, - 0x588, 0x588, - 0x58b, 0x58c, - 0x590, 0x590, - 0x5c8, 0x5cf, - 0x5eb, 0x5ef, - 0x5f5, 0x605, - 0x61c, 0x61d, - 0x6dd, 0x6dd, - 0x70e, 0x70f, - 0x74b, 0x74c, - 0x7b2, 0x7bf, - 0x7fb, 0x7ff, - 0x82e, 0x82f, - 0x83f, 0x83f, - 0x85c, 0x85d, - 0x85f, 0x89f, - 0x8b3, 0x8e3, - 0x984, 0x984, - 0x98d, 0x98e, - 0x991, 0x992, - 0x9a9, 0x9a9, - 0x9b1, 0x9b1, - 0x9b3, 0x9b5, - 0x9ba, 0x9bb, - 0x9c5, 0x9c6, - 0x9c9, 0x9ca, - 0x9cf, 0x9d6, - 0x9d8, 0x9db, - 0x9de, 0x9de, - 0x9e4, 0x9e5, - 0x9fc, 0xa00, - 0xa04, 0xa04, - 0xa0b, 0xa0e, - 0xa11, 0xa12, - 0xa29, 0xa29, - 0xa31, 0xa31, - 0xa34, 0xa34, - 0xa37, 0xa37, - 0xa3a, 0xa3b, - 0xa3d, 0xa3d, - 0xa43, 0xa46, - 0xa49, 0xa4a, - 0xa4e, 0xa50, - 0xa52, 0xa58, - 0xa5d, 0xa5d, - 0xa5f, 0xa65, - 0xa76, 0xa80, - 0xa84, 0xa84, - 0xa8e, 0xa8e, - 0xa92, 0xa92, - 0xaa9, 0xaa9, - 0xab1, 0xab1, - 0xab4, 0xab4, - 0xaba, 0xabb, - 0xac6, 0xac6, - 0xaca, 0xaca, - 0xace, 0xacf, - 0xad1, 0xadf, - 0xae4, 0xae5, - 0xaf2, 0xb00, - 0xb04, 0xb04, - 0xb0d, 0xb0e, - 0xb11, 0xb12, - 0xb29, 0xb29, - 0xb31, 0xb31, - 0xb34, 0xb34, - 0xb3a, 0xb3b, - 0xb45, 0xb46, - 0xb49, 0xb4a, - 0xb4e, 0xb55, - 0xb58, 0xb5b, - 0xb5e, 0xb5e, - 0xb64, 0xb65, - 0xb78, 0xb81, - 0xb84, 0xb84, - 0xb8b, 0xb8d, - 0xb91, 0xb91, - 0xb96, 0xb98, - 0xb9b, 0xb9b, - 0xb9d, 0xb9d, - 0xba0, 0xba2, - 0xba5, 0xba7, - 0xbab, 0xbad, - 0xbba, 0xbbd, - 0xbc3, 0xbc5, - 0xbc9, 0xbc9, - 0xbce, 0xbcf, - 0xbd1, 0xbd6, - 0xbd8, 0xbe5, - 0xbfb, 0xbff, - 0xc04, 0xc04, - 0xc0d, 0xc0d, - 0xc11, 0xc11, - 0xc29, 0xc29, - 0xc3a, 0xc3c, - 0xc45, 0xc45, - 0xc49, 0xc49, - 0xc4e, 0xc54, - 0xc57, 0xc57, - 0xc5a, 0xc5f, - 0xc64, 0xc65, - 0xc70, 0xc77, - 0xc80, 0xc80, - 0xc84, 0xc84, - 0xc8d, 0xc8d, - 0xc91, 0xc91, - 0xca9, 0xca9, - 0xcb4, 0xcb4, - 0xcba, 0xcbb, - 0xcc5, 0xcc5, - 0xcc9, 0xcc9, - 0xcce, 0xcd4, - 0xcd7, 0xcdd, - 0xcdf, 0xcdf, - 0xce4, 0xce5, - 0xcf0, 0xcf0, - 0xcf3, 0xd00, - 0xd04, 0xd04, - 0xd0d, 0xd0d, - 0xd11, 0xd11, - 0xd3b, 0xd3c, - 0xd45, 0xd45, - 0xd49, 0xd49, - 0xd4f, 0xd56, - 0xd58, 0xd5f, - 0xd64, 0xd65, - 0xd76, 0xd78, - 0xd80, 0xd81, - 0xd84, 0xd84, - 0xd97, 0xd99, - 0xdb2, 0xdb2, - 0xdbc, 0xdbc, - 0xdbe, 0xdbf, - 0xdc7, 0xdc9, - 0xdcb, 0xdce, - 0xdd5, 0xdd5, - 0xdd7, 0xdd7, - 0xde0, 0xde5, - 0xdf0, 0xdf1, - 0xdf5, 0xe00, - 0xe3b, 0xe3e, - 0xe5c, 0xe80, - 0xe83, 0xe83, - 0xe85, 0xe86, - 0xe89, 0xe89, - 0xe8b, 0xe8c, - 0xe8e, 0xe93, - 0xe98, 0xe98, - 0xea0, 0xea0, - 0xea4, 0xea4, - 0xea6, 0xea6, - 0xea8, 0xea9, - 0xeac, 0xeac, - 0xeba, 0xeba, - 0xebe, 0xebf, - 0xec5, 0xec5, - 0xec7, 0xec7, - 0xece, 0xecf, - 0xeda, 0xedb, - 0xee0, 0xeff, - 0xf48, 0xf48, - 0xf6d, 0xf70, - 0xf98, 0xf98, - 0xfbd, 0xfbd, - 0xfcd, 0xfcd, - 0xfdb, 0xfff, - 0x10c6, 0x10c6, - 0x10c8, 0x10cc, - 0x10ce, 0x10cf, - 0x1249, 0x1249, - 0x124e, 0x124f, - 0x1257, 0x1257, - 0x1259, 0x1259, - 0x125e, 0x125f, - 0x1289, 0x1289, - 0x128e, 0x128f, - 0x12b1, 0x12b1, - 0x12b6, 0x12b7, - 0x12bf, 0x12bf, - 0x12c1, 0x12c1, - 0x12c6, 0x12c7, - 0x12d7, 0x12d7, - 0x1311, 0x1311, - 0x1316, 0x1317, - 0x135b, 0x135c, - 0x137d, 0x137f, - 0x139a, 0x139f, - 0x13f5, 0x13ff, - 0x169d, 0x169f, - 0x16f9, 0x16ff, - 0x170d, 0x170d, - 0x1715, 0x171f, - 0x1737, 0x173f, - 0x1754, 0x175f, - 0x176d, 0x176d, - 0x1771, 0x1771, - 0x1774, 0x177f, - 0x17de, 0x17df, - 0x17ea, 0x17ef, - 0x17fa, 0x17ff, - 0x180e, 0x180f, - 0x181a, 0x181f, - 0x1878, 0x187f, - 0x18ab, 0x18af, - 0x18f6, 0x18ff, - 0x191f, 0x191f, - 0x192c, 0x192f, - 0x193c, 0x193f, - 0x1941, 0x1943, - 0x196e, 0x196f, - 0x1975, 0x197f, - 0x19ac, 0x19af, - 0x19ca, 0x19cf, - 0x19db, 0x19dd, - 0x1a1c, 0x1a1d, - 0x1a5f, 0x1a5f, - 0x1a7d, 0x1a7e, - 0x1a8a, 0x1a8f, - 0x1a9a, 0x1a9f, - 0x1aae, 0x1aaf, - 0x1abf, 0x1aff, - 0x1b4c, 0x1b4f, - 0x1b7d, 0x1b7f, - 0x1bf4, 0x1bfb, - 0x1c38, 0x1c3a, - 0x1c4a, 0x1c4c, - 0x1c80, 0x1cbf, - 0x1cc8, 0x1ccf, - 0x1cf7, 0x1cf7, - 0x1cfa, 0x1cff, - 0x1df6, 0x1dfb, - 0x1f16, 0x1f17, - 0x1f1e, 0x1f1f, - 0x1f46, 0x1f47, - 0x1f4e, 0x1f4f, - 0x1f58, 0x1f58, - 0x1f5a, 0x1f5a, - 0x1f5c, 0x1f5c, - 0x1f5e, 0x1f5e, - 0x1f7e, 0x1f7f, - 0x1fb5, 0x1fb5, - 0x1fc5, 0x1fc5, - 0x1fd4, 0x1fd5, - 0x1fdc, 0x1fdc, - 0x1ff0, 0x1ff1, - 0x1ff5, 0x1ff5, - 0x1fff, 0x1fff, - 0x200b, 0x200f, - 0x202a, 0x202e, - 0x2060, 0x206f, - 0x2072, 0x2073, - 0x208f, 0x208f, - 0x209d, 0x209f, - 0x20be, 0x20cf, - 0x20f1, 0x20ff, - 0x218a, 0x218f, - 0x23fb, 0x23ff, - 0x2427, 0x243f, - 0x244b, 0x245f, - 0x2b74, 0x2b75, - 0x2b96, 0x2b97, - 0x2bba, 0x2bbc, - 0x2bc9, 0x2bc9, - 0x2bd2, 0x2bff, - 0x2c2f, 0x2c2f, - 0x2c5f, 0x2c5f, - 0x2cf4, 0x2cf8, - 0x2d26, 0x2d26, - 0x2d28, 0x2d2c, - 0x2d2e, 0x2d2f, - 0x2d68, 0x2d6e, - 0x2d71, 0x2d7e, - 0x2d97, 0x2d9f, - 0x2da7, 0x2da7, - 0x2daf, 0x2daf, - 0x2db7, 0x2db7, - 0x2dbf, 0x2dbf, - 0x2dc7, 0x2dc7, - 0x2dcf, 0x2dcf, - 0x2dd7, 0x2dd7, - 0x2ddf, 0x2ddf, - 0x2e43, 0x2e7f, - 0x2e9a, 0x2e9a, - 0x2ef4, 0x2eff, - 0x2fd6, 0x2fef, - 0x2ffc, 0x2fff, - 0x3040, 0x3040, - 0x3097, 0x3098, - 0x3100, 0x3104, - 0x312e, 0x3130, - 0x318f, 0x318f, - 0x31bb, 0x31bf, - 0x31e4, 0x31ef, - 0x321f, 0x321f, - 0x32ff, 0x32ff, - 0x4db6, 0x4dbf, - 0x9fcd, 0x9fff, - 0xa48d, 0xa48f, - 0xa4c7, 0xa4cf, - 0xa62c, 0xa63f, - 0xa69e, 0xa69e, - 0xa6f8, 0xa6ff, - 0xa78f, 0xa78f, - 0xa7ae, 0xa7af, - 0xa7b2, 0xa7f6, - 0xa82c, 0xa82f, - 0xa83a, 0xa83f, - 0xa878, 0xa87f, - 0xa8c5, 0xa8cd, - 0xa8da, 0xa8df, - 0xa8fc, 0xa8ff, - 0xa954, 0xa95e, - 0xa97d, 0xa97f, - 0xa9ce, 0xa9ce, - 0xa9da, 0xa9dd, - 0xa9ff, 0xa9ff, - 0xaa37, 0xaa3f, - 0xaa4e, 0xaa4f, - 0xaa5a, 0xaa5b, - 0xaac3, 0xaada, - 0xaaf7, 0xab00, - 0xab07, 0xab08, - 0xab0f, 0xab10, - 0xab17, 0xab1f, - 0xab27, 0xab27, - 0xab2f, 0xab2f, - 0xab60, 0xab63, - 0xab66, 0xabbf, - 0xabee, 0xabef, - 0xabfa, 0xabff, - 0xd7a4, 0xd7af, - 0xd7c7, 0xd7ca, - 0xd7fc, 0xf8ff, - 0xfa6e, 0xfa6f, - 0xfada, 0xfaff, - 0xfb07, 0xfb12, - 0xfb18, 0xfb1c, - 0xfb37, 0xfb37, - 0xfb3d, 0xfb3d, - 0xfb3f, 0xfb3f, - 0xfb42, 0xfb42, - 0xfb45, 0xfb45, - 0xfbc2, 0xfbd2, - 0xfd40, 0xfd4f, - 0xfd90, 0xfd91, - 0xfdc8, 0xfdef, - 0xfdfe, 0xfdff, - 0xfe1a, 0xfe1f, - 0xfe2e, 0xfe2f, - 0xfe53, 0xfe53, - 0xfe67, 0xfe67, - 0xfe6c, 0xfe6f, - 0xfe75, 0xfe75, - 0xfefd, 0xff00, - 0xffbf, 0xffc1, - 0xffc8, 0xffc9, - 0xffd0, 0xffd1, - 0xffd8, 0xffd9, - 0xffdd, 0xffdf, - 0xffe7, 0xffe7, - 0xffef, 0xfffb, - 0xfffe, 0xffff, - 0x1000c, 0x1000c, - 0x10027, 0x10027, - 0x1003b, 0x1003b, - 0x1003e, 0x1003e, - 0x1004e, 0x1004f, - 0x1005e, 0x1007f, - 0x100fb, 0x100ff, - 0x10103, 0x10106, - 0x10134, 0x10136, - 0x1018d, 0x1018f, - 0x1019c, 0x1019f, - 0x101a1, 0x101cf, - 0x101fe, 0x1027f, - 0x1029d, 0x1029f, - 0x102d1, 0x102df, - 0x102fc, 0x102ff, - 0x10324, 0x1032f, - 0x1034b, 0x1034f, - 0x1037b, 0x1037f, - 0x1039e, 0x1039e, - 0x103c4, 0x103c7, - 0x103d6, 0x103ff, - 0x1049e, 0x1049f, - 0x104aa, 0x104ff, - 0x10528, 0x1052f, - 0x10564, 0x1056e, - 0x10570, 0x105ff, - 0x10737, 0x1073f, - 0x10756, 0x1075f, - 0x10768, 0x107ff, - 0x10806, 0x10807, - 0x10809, 0x10809, - 0x10836, 0x10836, - 0x10839, 0x1083b, - 0x1083d, 0x1083e, - 0x10856, 0x10856, - 0x1089f, 0x108a6, - 0x108b0, 0x108ff, - 0x1091c, 0x1091e, - 0x1093a, 0x1093e, - 0x10940, 0x1097f, - 0x109b8, 0x109bd, - 0x109c0, 0x109ff, - 0x10a04, 0x10a04, - 0x10a07, 0x10a0b, - 0x10a14, 0x10a14, - 0x10a18, 0x10a18, - 0x10a34, 0x10a37, - 0x10a3b, 0x10a3e, - 0x10a48, 0x10a4f, - 0x10a59, 0x10a5f, - 0x10aa0, 0x10abf, - 0x10ae7, 0x10aea, - 0x10af7, 0x10aff, - 0x10b36, 0x10b38, - 0x10b56, 0x10b57, - 0x10b73, 0x10b77, - 0x10b92, 0x10b98, - 0x10b9d, 0x10ba8, - 0x10bb0, 0x10bff, - 0x10c49, 0x10e5f, - 0x10e7f, 0x10fff, - 0x1104e, 0x11051, - 0x11070, 0x1107e, - 0x110bd, 0x110bd, - 0x110c2, 0x110cf, - 0x110e9, 0x110ef, - 0x110fa, 0x110ff, - 0x11135, 0x11135, - 0x11144, 0x1114f, - 0x11177, 0x1117f, - 0x111c9, 0x111cc, - 0x111ce, 0x111cf, - 0x111db, 0x111e0, - 0x111f5, 0x111ff, - 0x11212, 0x11212, - 0x1123e, 0x112af, - 0x112eb, 0x112ef, - 0x112fa, 0x11300, - 0x11304, 0x11304, - 0x1130d, 0x1130e, - 0x11311, 0x11312, - 0x11329, 0x11329, - 0x11331, 0x11331, - 0x11334, 0x11334, - 0x1133a, 0x1133b, - 0x11345, 0x11346, - 0x11349, 0x1134a, - 0x1134e, 0x11356, - 0x11358, 0x1135c, - 0x11364, 0x11365, - 0x1136d, 0x1136f, - 0x11375, 0x1147f, - 0x114c8, 0x114cf, - 0x114da, 0x1157f, - 0x115b6, 0x115b7, - 0x115ca, 0x115ff, - 0x11645, 0x1164f, - 0x1165a, 0x1167f, - 0x116b8, 0x116bf, - 0x116ca, 0x1189f, - 0x118f3, 0x118fe, - 0x11900, 0x11abf, - 0x11af9, 0x11fff, - 0x12399, 0x123ff, - 0x1246f, 0x1246f, - 0x12475, 0x12fff, - 0x1342f, 0x167ff, - 0x16a39, 0x16a3f, - 0x16a5f, 0x16a5f, - 0x16a6a, 0x16a6d, - 0x16a70, 0x16acf, - 0x16aee, 0x16aef, - 0x16af6, 0x16aff, - 0x16b46, 0x16b4f, - 0x16b5a, 0x16b5a, - 0x16b62, 0x16b62, - 0x16b78, 0x16b7c, - 0x16b90, 0x16eff, - 0x16f45, 0x16f4f, - 0x16f7f, 0x16f8e, - 0x16fa0, 0x1afff, - 0x1b002, 0x1bbff, - 0x1bc6b, 0x1bc6f, - 0x1bc7d, 0x1bc7f, - 0x1bc89, 0x1bc8f, - 0x1bc9a, 0x1bc9b, - 0x1bca0, 0x1cfff, - 0x1d0f6, 0x1d0ff, - 0x1d127, 0x1d128, - 0x1d173, 0x1d17a, - 0x1d1de, 0x1d1ff, - 0x1d246, 0x1d2ff, - 0x1d357, 0x1d35f, - 0x1d372, 0x1d3ff, - 0x1d455, 0x1d455, - 0x1d49d, 0x1d49d, - 0x1d4a0, 0x1d4a1, - 0x1d4a3, 0x1d4a4, - 0x1d4a7, 0x1d4a8, - 0x1d4ad, 0x1d4ad, - 0x1d4ba, 0x1d4ba, - 0x1d4bc, 0x1d4bc, - 0x1d4c4, 0x1d4c4, - 0x1d506, 0x1d506, - 0x1d50b, 0x1d50c, - 0x1d515, 0x1d515, - 0x1d51d, 0x1d51d, - 0x1d53a, 0x1d53a, - 0x1d53f, 0x1d53f, - 0x1d545, 0x1d545, - 0x1d547, 0x1d549, - 0x1d551, 0x1d551, - 0x1d6a6, 0x1d6a7, - 0x1d7cc, 0x1d7cd, - 0x1d800, 0x1e7ff, - 0x1e8c5, 0x1e8c6, - 0x1e8d7, 0x1edff, - 0x1ee04, 0x1ee04, - 0x1ee20, 0x1ee20, - 0x1ee23, 0x1ee23, - 0x1ee25, 0x1ee26, - 0x1ee28, 0x1ee28, - 0x1ee33, 0x1ee33, - 0x1ee38, 0x1ee38, - 0x1ee3a, 0x1ee3a, - 0x1ee3c, 0x1ee41, - 0x1ee43, 0x1ee46, - 0x1ee48, 0x1ee48, - 0x1ee4a, 0x1ee4a, - 0x1ee4c, 0x1ee4c, - 0x1ee50, 0x1ee50, - 0x1ee53, 0x1ee53, - 0x1ee55, 0x1ee56, - 0x1ee58, 0x1ee58, - 0x1ee5a, 0x1ee5a, - 0x1ee5c, 0x1ee5c, - 0x1ee5e, 0x1ee5e, - 0x1ee60, 0x1ee60, - 0x1ee63, 0x1ee63, - 0x1ee65, 0x1ee66, - 0x1ee6b, 0x1ee6b, - 0x1ee73, 0x1ee73, - 0x1ee78, 0x1ee78, - 0x1ee7d, 0x1ee7d, - 0x1ee7f, 0x1ee7f, - 0x1ee8a, 0x1ee8a, - 0x1ee9c, 0x1eea0, - 0x1eea4, 0x1eea4, - 0x1eeaa, 0x1eeaa, - 0x1eebc, 0x1eeef, - 0x1eef2, 0x1efff, - 0x1f02c, 0x1f02f, - 0x1f094, 0x1f09f, - 0x1f0af, 0x1f0b0, - 0x1f0c0, 0x1f0c0, - 0x1f0d0, 0x1f0d0, - 0x1f0f6, 0x1f0ff, - 0x1f10d, 0x1f10f, - 0x1f12f, 0x1f12f, - 0x1f16c, 0x1f16f, - 0x1f19b, 0x1f1e5, - 0x1f203, 0x1f20f, - 0x1f23b, 0x1f23f, - 0x1f249, 0x1f24f, - 0x1f252, 0x1f2ff, - 0x1f32d, 0x1f32f, - 0x1f37e, 0x1f37f, - 0x1f3cf, 0x1f3d3, - 0x1f3f8, 0x1f3ff, - 0x1f4ff, 0x1f4ff, - 0x1f54b, 0x1f54f, - 0x1f57a, 0x1f57a, - 0x1f5a4, 0x1f5a4, - 0x1f643, 0x1f644, - 0x1f6d0, 0x1f6df, - 0x1f6ed, 0x1f6ef, - 0x1f6f4, 0x1f6ff, - 0x1f774, 0x1f77f, - 0x1f7d5, 0x1f7ff, - 0x1f80c, 0x1f80f, - 0x1f848, 0x1f84f, - 0x1f85a, 0x1f85f, - 0x1f888, 0x1f88f, - 0x1f8ae, 0x1ffff, - 0x2a6d7, 0x2a6ff, - 0x2b735, 0x2b73f, - 0x2b81e, 0x2f7ff, - 0x2fa1e, 0xe00ff, - 0xe01f0, 0x10ffff -}; -UCP_FN(C) - -static const unichar ucp_Cc_def[] = { - 0x0, 0x1f, - 0x7f, 0x9f -}; -UCP_FN(Cc) - -static const unichar ucp_Cf_def[] = { - 0xad, 0xad, - 0x600, 0x605, - 0x61c, 0x61c, - 0x6dd, 0x6dd, - 0x70f, 0x70f, - 0x180e, 0x180e, - 0x200b, 0x200f, - 0x202a, 0x202e, - 0x2060, 0x2064, - 0x2066, 0x206f, - 0xfeff, 0xfeff, - 0xfff9, 0xfffb, - 0x110bd, 0x110bd, - 0x1bca0, 0x1bca3, - 0x1d173, 0x1d17a, - 0xe0001, 0xe0001, - 0xe0020, 0xe007f -}; -UCP_FN(Cf) - -static const unichar ucp_Cn_def[] = { - 0x378, 0x379, - 0x380, 0x383, - 0x38b, 0x38b, - 0x38d, 0x38d, - 0x3a2, 0x3a2, - 0x530, 0x530, - 0x557, 0x558, - 0x560, 0x560, - 0x588, 0x588, - 0x58b, 0x58c, - 0x590, 0x590, - 0x5c8, 0x5cf, - 0x5eb, 0x5ef, - 0x5f5, 0x5ff, - 0x61d, 0x61d, - 0x70e, 0x70e, - 0x74b, 0x74c, - 0x7b2, 0x7bf, - 0x7fb, 0x7ff, - 0x82e, 0x82f, - 0x83f, 0x83f, - 0x85c, 0x85d, - 0x85f, 0x89f, - 0x8b3, 0x8e3, - 0x984, 0x984, - 0x98d, 0x98e, - 0x991, 0x992, - 0x9a9, 0x9a9, - 0x9b1, 0x9b1, - 0x9b3, 0x9b5, - 0x9ba, 0x9bb, - 0x9c5, 0x9c6, - 0x9c9, 0x9ca, - 0x9cf, 0x9d6, - 0x9d8, 0x9db, - 0x9de, 0x9de, - 0x9e4, 0x9e5, - 0x9fc, 0xa00, - 0xa04, 0xa04, - 0xa0b, 0xa0e, - 0xa11, 0xa12, - 0xa29, 0xa29, - 0xa31, 0xa31, - 0xa34, 0xa34, - 0xa37, 0xa37, - 0xa3a, 0xa3b, - 0xa3d, 0xa3d, - 0xa43, 0xa46, - 0xa49, 0xa4a, - 0xa4e, 0xa50, - 0xa52, 0xa58, - 0xa5d, 0xa5d, - 0xa5f, 0xa65, - 0xa76, 0xa80, - 0xa84, 0xa84, - 0xa8e, 0xa8e, - 0xa92, 0xa92, - 0xaa9, 0xaa9, - 0xab1, 0xab1, - 0xab4, 0xab4, - 0xaba, 0xabb, - 0xac6, 0xac6, - 0xaca, 0xaca, - 0xace, 0xacf, - 0xad1, 0xadf, - 0xae4, 0xae5, - 0xaf2, 0xb00, - 0xb04, 0xb04, - 0xb0d, 0xb0e, - 0xb11, 0xb12, - 0xb29, 0xb29, - 0xb31, 0xb31, - 0xb34, 0xb34, - 0xb3a, 0xb3b, - 0xb45, 0xb46, - 0xb49, 0xb4a, - 0xb4e, 0xb55, - 0xb58, 0xb5b, - 0xb5e, 0xb5e, - 0xb64, 0xb65, - 0xb78, 0xb81, - 0xb84, 0xb84, - 0xb8b, 0xb8d, - 0xb91, 0xb91, - 0xb96, 0xb98, - 0xb9b, 0xb9b, - 0xb9d, 0xb9d, - 0xba0, 0xba2, - 0xba5, 0xba7, - 0xbab, 0xbad, - 0xbba, 0xbbd, - 0xbc3, 0xbc5, - 0xbc9, 0xbc9, - 0xbce, 0xbcf, - 0xbd1, 0xbd6, - 0xbd8, 0xbe5, - 0xbfb, 0xbff, - 0xc04, 0xc04, - 0xc0d, 0xc0d, - 0xc11, 0xc11, - 0xc29, 0xc29, - 0xc3a, 0xc3c, - 0xc45, 0xc45, - 0xc49, 0xc49, - 0xc4e, 0xc54, - 0xc57, 0xc57, - 0xc5a, 0xc5f, - 0xc64, 0xc65, - 0xc70, 0xc77, - 0xc80, 0xc80, - 0xc84, 0xc84, - 0xc8d, 0xc8d, - 0xc91, 0xc91, - 0xca9, 0xca9, - 0xcb4, 0xcb4, - 0xcba, 0xcbb, - 0xcc5, 0xcc5, - 0xcc9, 0xcc9, - 0xcce, 0xcd4, - 0xcd7, 0xcdd, - 0xcdf, 0xcdf, - 0xce4, 0xce5, - 0xcf0, 0xcf0, - 0xcf3, 0xd00, - 0xd04, 0xd04, - 0xd0d, 0xd0d, - 0xd11, 0xd11, - 0xd3b, 0xd3c, - 0xd45, 0xd45, - 0xd49, 0xd49, - 0xd4f, 0xd56, - 0xd58, 0xd5f, - 0xd64, 0xd65, - 0xd76, 0xd78, - 0xd80, 0xd81, - 0xd84, 0xd84, - 0xd97, 0xd99, - 0xdb2, 0xdb2, - 0xdbc, 0xdbc, - 0xdbe, 0xdbf, - 0xdc7, 0xdc9, - 0xdcb, 0xdce, - 0xdd5, 0xdd5, - 0xdd7, 0xdd7, - 0xde0, 0xde5, - 0xdf0, 0xdf1, - 0xdf5, 0xe00, - 0xe3b, 0xe3e, - 0xe5c, 0xe80, - 0xe83, 0xe83, - 0xe85, 0xe86, - 0xe89, 0xe89, - 0xe8b, 0xe8c, - 0xe8e, 0xe93, - 0xe98, 0xe98, - 0xea0, 0xea0, - 0xea4, 0xea4, - 0xea6, 0xea6, - 0xea8, 0xea9, - 0xeac, 0xeac, - 0xeba, 0xeba, - 0xebe, 0xebf, - 0xec5, 0xec5, - 0xec7, 0xec7, - 0xece, 0xecf, - 0xeda, 0xedb, - 0xee0, 0xeff, - 0xf48, 0xf48, - 0xf6d, 0xf70, - 0xf98, 0xf98, - 0xfbd, 0xfbd, - 0xfcd, 0xfcd, - 0xfdb, 0xfff, - 0x10c6, 0x10c6, - 0x10c8, 0x10cc, - 0x10ce, 0x10cf, - 0x1249, 0x1249, - 0x124e, 0x124f, - 0x1257, 0x1257, - 0x1259, 0x1259, - 0x125e, 0x125f, - 0x1289, 0x1289, - 0x128e, 0x128f, - 0x12b1, 0x12b1, - 0x12b6, 0x12b7, - 0x12bf, 0x12bf, - 0x12c1, 0x12c1, - 0x12c6, 0x12c7, - 0x12d7, 0x12d7, - 0x1311, 0x1311, - 0x1316, 0x1317, - 0x135b, 0x135c, - 0x137d, 0x137f, - 0x139a, 0x139f, - 0x13f5, 0x13ff, - 0x169d, 0x169f, - 0x16f9, 0x16ff, - 0x170d, 0x170d, - 0x1715, 0x171f, - 0x1737, 0x173f, - 0x1754, 0x175f, - 0x176d, 0x176d, - 0x1771, 0x1771, - 0x1774, 0x177f, - 0x17de, 0x17df, - 0x17ea, 0x17ef, - 0x17fa, 0x17ff, - 0x180f, 0x180f, - 0x181a, 0x181f, - 0x1878, 0x187f, - 0x18ab, 0x18af, - 0x18f6, 0x18ff, - 0x191f, 0x191f, - 0x192c, 0x192f, - 0x193c, 0x193f, - 0x1941, 0x1943, - 0x196e, 0x196f, - 0x1975, 0x197f, - 0x19ac, 0x19af, - 0x19ca, 0x19cf, - 0x19db, 0x19dd, - 0x1a1c, 0x1a1d, - 0x1a5f, 0x1a5f, - 0x1a7d, 0x1a7e, - 0x1a8a, 0x1a8f, - 0x1a9a, 0x1a9f, - 0x1aae, 0x1aaf, - 0x1abf, 0x1aff, - 0x1b4c, 0x1b4f, - 0x1b7d, 0x1b7f, - 0x1bf4, 0x1bfb, - 0x1c38, 0x1c3a, - 0x1c4a, 0x1c4c, - 0x1c80, 0x1cbf, - 0x1cc8, 0x1ccf, - 0x1cf7, 0x1cf7, - 0x1cfa, 0x1cff, - 0x1df6, 0x1dfb, - 0x1f16, 0x1f17, - 0x1f1e, 0x1f1f, - 0x1f46, 0x1f47, - 0x1f4e, 0x1f4f, - 0x1f58, 0x1f58, - 0x1f5a, 0x1f5a, - 0x1f5c, 0x1f5c, - 0x1f5e, 0x1f5e, - 0x1f7e, 0x1f7f, - 0x1fb5, 0x1fb5, - 0x1fc5, 0x1fc5, - 0x1fd4, 0x1fd5, - 0x1fdc, 0x1fdc, - 0x1ff0, 0x1ff1, - 0x1ff5, 0x1ff5, - 0x1fff, 0x1fff, - 0x2065, 0x2065, - 0x2072, 0x2073, - 0x208f, 0x208f, - 0x209d, 0x209f, - 0x20be, 0x20cf, - 0x20f1, 0x20ff, - 0x218a, 0x218f, - 0x23fb, 0x23ff, - 0x2427, 0x243f, - 0x244b, 0x245f, - 0x2b74, 0x2b75, - 0x2b96, 0x2b97, - 0x2bba, 0x2bbc, - 0x2bc9, 0x2bc9, - 0x2bd2, 0x2bff, - 0x2c2f, 0x2c2f, - 0x2c5f, 0x2c5f, - 0x2cf4, 0x2cf8, - 0x2d26, 0x2d26, - 0x2d28, 0x2d2c, - 0x2d2e, 0x2d2f, - 0x2d68, 0x2d6e, - 0x2d71, 0x2d7e, - 0x2d97, 0x2d9f, - 0x2da7, 0x2da7, - 0x2daf, 0x2daf, - 0x2db7, 0x2db7, - 0x2dbf, 0x2dbf, - 0x2dc7, 0x2dc7, - 0x2dcf, 0x2dcf, - 0x2dd7, 0x2dd7, - 0x2ddf, 0x2ddf, - 0x2e43, 0x2e7f, - 0x2e9a, 0x2e9a, - 0x2ef4, 0x2eff, - 0x2fd6, 0x2fef, - 0x2ffc, 0x2fff, - 0x3040, 0x3040, - 0x3097, 0x3098, - 0x3100, 0x3104, - 0x312e, 0x3130, - 0x318f, 0x318f, - 0x31bb, 0x31bf, - 0x31e4, 0x31ef, - 0x321f, 0x321f, - 0x32ff, 0x32ff, - 0x4db6, 0x4dbf, - 0x9fcd, 0x9fff, - 0xa48d, 0xa48f, - 0xa4c7, 0xa4cf, - 0xa62c, 0xa63f, - 0xa69e, 0xa69e, - 0xa6f8, 0xa6ff, - 0xa78f, 0xa78f, - 0xa7ae, 0xa7af, - 0xa7b2, 0xa7f6, - 0xa82c, 0xa82f, - 0xa83a, 0xa83f, - 0xa878, 0xa87f, - 0xa8c5, 0xa8cd, - 0xa8da, 0xa8df, - 0xa8fc, 0xa8ff, - 0xa954, 0xa95e, - 0xa97d, 0xa97f, - 0xa9ce, 0xa9ce, - 0xa9da, 0xa9dd, - 0xa9ff, 0xa9ff, - 0xaa37, 0xaa3f, - 0xaa4e, 0xaa4f, - 0xaa5a, 0xaa5b, - 0xaac3, 0xaada, - 0xaaf7, 0xab00, - 0xab07, 0xab08, - 0xab0f, 0xab10, - 0xab17, 0xab1f, - 0xab27, 0xab27, - 0xab2f, 0xab2f, - 0xab60, 0xab63, - 0xab66, 0xabbf, - 0xabee, 0xabef, - 0xabfa, 0xabff, - 0xd7a4, 0xd7af, - 0xd7c7, 0xd7ca, - 0xd7fc, 0xd7ff, - 0xfa6e, 0xfa6f, - 0xfada, 0xfaff, - 0xfb07, 0xfb12, - 0xfb18, 0xfb1c, - 0xfb37, 0xfb37, - 0xfb3d, 0xfb3d, - 0xfb3f, 0xfb3f, - 0xfb42, 0xfb42, - 0xfb45, 0xfb45, - 0xfbc2, 0xfbd2, - 0xfd40, 0xfd4f, - 0xfd90, 0xfd91, - 0xfdc8, 0xfdef, - 0xfdfe, 0xfdff, - 0xfe1a, 0xfe1f, - 0xfe2e, 0xfe2f, - 0xfe53, 0xfe53, - 0xfe67, 0xfe67, - 0xfe6c, 0xfe6f, - 0xfe75, 0xfe75, - 0xfefd, 0xfefe, - 0xff00, 0xff00, - 0xffbf, 0xffc1, - 0xffc8, 0xffc9, - 0xffd0, 0xffd1, - 0xffd8, 0xffd9, - 0xffdd, 0xffdf, - 0xffe7, 0xffe7, - 0xffef, 0xfff8, - 0xfffe, 0xffff, - 0x1000c, 0x1000c, - 0x10027, 0x10027, - 0x1003b, 0x1003b, - 0x1003e, 0x1003e, - 0x1004e, 0x1004f, - 0x1005e, 0x1007f, - 0x100fb, 0x100ff, - 0x10103, 0x10106, - 0x10134, 0x10136, - 0x1018d, 0x1018f, - 0x1019c, 0x1019f, - 0x101a1, 0x101cf, - 0x101fe, 0x1027f, - 0x1029d, 0x1029f, - 0x102d1, 0x102df, - 0x102fc, 0x102ff, - 0x10324, 0x1032f, - 0x1034b, 0x1034f, - 0x1037b, 0x1037f, - 0x1039e, 0x1039e, - 0x103c4, 0x103c7, - 0x103d6, 0x103ff, - 0x1049e, 0x1049f, - 0x104aa, 0x104ff, - 0x10528, 0x1052f, - 0x10564, 0x1056e, - 0x10570, 0x105ff, - 0x10737, 0x1073f, - 0x10756, 0x1075f, - 0x10768, 0x107ff, - 0x10806, 0x10807, - 0x10809, 0x10809, - 0x10836, 0x10836, - 0x10839, 0x1083b, - 0x1083d, 0x1083e, - 0x10856, 0x10856, - 0x1089f, 0x108a6, - 0x108b0, 0x108ff, - 0x1091c, 0x1091e, - 0x1093a, 0x1093e, - 0x10940, 0x1097f, - 0x109b8, 0x109bd, - 0x109c0, 0x109ff, - 0x10a04, 0x10a04, - 0x10a07, 0x10a0b, - 0x10a14, 0x10a14, - 0x10a18, 0x10a18, - 0x10a34, 0x10a37, - 0x10a3b, 0x10a3e, - 0x10a48, 0x10a4f, - 0x10a59, 0x10a5f, - 0x10aa0, 0x10abf, - 0x10ae7, 0x10aea, - 0x10af7, 0x10aff, - 0x10b36, 0x10b38, - 0x10b56, 0x10b57, - 0x10b73, 0x10b77, - 0x10b92, 0x10b98, - 0x10b9d, 0x10ba8, - 0x10bb0, 0x10bff, - 0x10c49, 0x10e5f, - 0x10e7f, 0x10fff, - 0x1104e, 0x11051, - 0x11070, 0x1107e, - 0x110c2, 0x110cf, - 0x110e9, 0x110ef, - 0x110fa, 0x110ff, - 0x11135, 0x11135, - 0x11144, 0x1114f, - 0x11177, 0x1117f, - 0x111c9, 0x111cc, - 0x111ce, 0x111cf, - 0x111db, 0x111e0, - 0x111f5, 0x111ff, - 0x11212, 0x11212, - 0x1123e, 0x112af, - 0x112eb, 0x112ef, - 0x112fa, 0x11300, - 0x11304, 0x11304, - 0x1130d, 0x1130e, - 0x11311, 0x11312, - 0x11329, 0x11329, - 0x11331, 0x11331, - 0x11334, 0x11334, - 0x1133a, 0x1133b, - 0x11345, 0x11346, - 0x11349, 0x1134a, - 0x1134e, 0x11356, - 0x11358, 0x1135c, - 0x11364, 0x11365, - 0x1136d, 0x1136f, - 0x11375, 0x1147f, - 0x114c8, 0x114cf, - 0x114da, 0x1157f, - 0x115b6, 0x115b7, - 0x115ca, 0x115ff, - 0x11645, 0x1164f, - 0x1165a, 0x1167f, - 0x116b8, 0x116bf, - 0x116ca, 0x1189f, - 0x118f3, 0x118fe, - 0x11900, 0x11abf, - 0x11af9, 0x11fff, - 0x12399, 0x123ff, - 0x1246f, 0x1246f, - 0x12475, 0x12fff, - 0x1342f, 0x167ff, - 0x16a39, 0x16a3f, - 0x16a5f, 0x16a5f, - 0x16a6a, 0x16a6d, - 0x16a70, 0x16acf, - 0x16aee, 0x16aef, - 0x16af6, 0x16aff, - 0x16b46, 0x16b4f, - 0x16b5a, 0x16b5a, - 0x16b62, 0x16b62, - 0x16b78, 0x16b7c, - 0x16b90, 0x16eff, - 0x16f45, 0x16f4f, - 0x16f7f, 0x16f8e, - 0x16fa0, 0x1afff, - 0x1b002, 0x1bbff, - 0x1bc6b, 0x1bc6f, - 0x1bc7d, 0x1bc7f, - 0x1bc89, 0x1bc8f, - 0x1bc9a, 0x1bc9b, - 0x1bca4, 0x1cfff, - 0x1d0f6, 0x1d0ff, - 0x1d127, 0x1d128, - 0x1d1de, 0x1d1ff, - 0x1d246, 0x1d2ff, - 0x1d357, 0x1d35f, - 0x1d372, 0x1d3ff, - 0x1d455, 0x1d455, - 0x1d49d, 0x1d49d, - 0x1d4a0, 0x1d4a1, - 0x1d4a3, 0x1d4a4, - 0x1d4a7, 0x1d4a8, - 0x1d4ad, 0x1d4ad, - 0x1d4ba, 0x1d4ba, - 0x1d4bc, 0x1d4bc, - 0x1d4c4, 0x1d4c4, - 0x1d506, 0x1d506, - 0x1d50b, 0x1d50c, - 0x1d515, 0x1d515, - 0x1d51d, 0x1d51d, - 0x1d53a, 0x1d53a, - 0x1d53f, 0x1d53f, - 0x1d545, 0x1d545, - 0x1d547, 0x1d549, - 0x1d551, 0x1d551, - 0x1d6a6, 0x1d6a7, - 0x1d7cc, 0x1d7cd, - 0x1d800, 0x1e7ff, - 0x1e8c5, 0x1e8c6, - 0x1e8d7, 0x1edff, - 0x1ee04, 0x1ee04, - 0x1ee20, 0x1ee20, - 0x1ee23, 0x1ee23, - 0x1ee25, 0x1ee26, - 0x1ee28, 0x1ee28, - 0x1ee33, 0x1ee33, - 0x1ee38, 0x1ee38, - 0x1ee3a, 0x1ee3a, - 0x1ee3c, 0x1ee41, - 0x1ee43, 0x1ee46, - 0x1ee48, 0x1ee48, - 0x1ee4a, 0x1ee4a, - 0x1ee4c, 0x1ee4c, - 0x1ee50, 0x1ee50, - 0x1ee53, 0x1ee53, - 0x1ee55, 0x1ee56, - 0x1ee58, 0x1ee58, - 0x1ee5a, 0x1ee5a, - 0x1ee5c, 0x1ee5c, - 0x1ee5e, 0x1ee5e, - 0x1ee60, 0x1ee60, - 0x1ee63, 0x1ee63, - 0x1ee65, 0x1ee66, - 0x1ee6b, 0x1ee6b, - 0x1ee73, 0x1ee73, - 0x1ee78, 0x1ee78, - 0x1ee7d, 0x1ee7d, - 0x1ee7f, 0x1ee7f, - 0x1ee8a, 0x1ee8a, - 0x1ee9c, 0x1eea0, - 0x1eea4, 0x1eea4, - 0x1eeaa, 0x1eeaa, - 0x1eebc, 0x1eeef, - 0x1eef2, 0x1efff, - 0x1f02c, 0x1f02f, - 0x1f094, 0x1f09f, - 0x1f0af, 0x1f0b0, - 0x1f0c0, 0x1f0c0, - 0x1f0d0, 0x1f0d0, - 0x1f0f6, 0x1f0ff, - 0x1f10d, 0x1f10f, - 0x1f12f, 0x1f12f, - 0x1f16c, 0x1f16f, - 0x1f19b, 0x1f1e5, - 0x1f203, 0x1f20f, - 0x1f23b, 0x1f23f, - 0x1f249, 0x1f24f, - 0x1f252, 0x1f2ff, - 0x1f32d, 0x1f32f, - 0x1f37e, 0x1f37f, - 0x1f3cf, 0x1f3d3, - 0x1f3f8, 0x1f3ff, - 0x1f4ff, 0x1f4ff, - 0x1f54b, 0x1f54f, - 0x1f57a, 0x1f57a, - 0x1f5a4, 0x1f5a4, - 0x1f643, 0x1f644, - 0x1f6d0, 0x1f6df, - 0x1f6ed, 0x1f6ef, - 0x1f6f4, 0x1f6ff, - 0x1f774, 0x1f77f, - 0x1f7d5, 0x1f7ff, - 0x1f80c, 0x1f80f, - 0x1f848, 0x1f84f, - 0x1f85a, 0x1f85f, - 0x1f888, 0x1f88f, - 0x1f8ae, 0x1ffff, - 0x2a6d7, 0x2a6ff, - 0x2b735, 0x2b73f, - 0x2b81e, 0x2f7ff, - 0x2fa1e, 0xe0000, - 0xe0002, 0xe001f, - 0xe0080, 0xe00ff, - 0xe01f0, 0xeffff, - 0xffffe, 0xfffff, - 0x10fffe, 0x10ffff -}; -UCP_FN(Cn) - -static const unichar ucp_Co_def[] = { - 0xe000, 0xf8ff, - 0xf0000, 0xffffd, - 0x100000, 0x10fffd -}; -UCP_FN(Co) - -static const unichar ucp_Cs_def[] = { - 0xd800, 0xdfff -}; -UCP_FN(Cs) - -static const unichar ucp_L_def[] = { - 0x41, 0x5a, - 0x61, 0x7a, - 0xaa, 0xaa, - 0xb5, 0xb5, - 0xba, 0xba, - 0xc0, 0xd6, - 0xd8, 0xf6, - 0xf8, 0x2c1, - 0x2c6, 0x2d1, - 0x2e0, 0x2e4, - 0x2ec, 0x2ec, - 0x2ee, 0x2ee, - 0x370, 0x374, - 0x376, 0x377, - 0x37a, 0x37d, - 0x37f, 0x37f, - 0x386, 0x386, - 0x388, 0x38a, - 0x38c, 0x38c, - 0x38e, 0x3a1, - 0x3a3, 0x3f5, - 0x3f7, 0x481, - 0x48a, 0x52f, - 0x531, 0x556, - 0x559, 0x559, - 0x561, 0x587, - 0x5d0, 0x5ea, - 0x5f0, 0x5f2, - 0x620, 0x64a, - 0x66e, 0x66f, - 0x671, 0x6d3, - 0x6d5, 0x6d5, - 0x6e5, 0x6e6, - 0x6ee, 0x6ef, - 0x6fa, 0x6fc, - 0x6ff, 0x6ff, - 0x710, 0x710, - 0x712, 0x72f, - 0x74d, 0x7a5, - 0x7b1, 0x7b1, - 0x7ca, 0x7ea, - 0x7f4, 0x7f5, - 0x7fa, 0x7fa, - 0x800, 0x815, - 0x81a, 0x81a, - 0x824, 0x824, - 0x828, 0x828, - 0x840, 0x858, - 0x8a0, 0x8b2, - 0x904, 0x939, - 0x93d, 0x93d, - 0x950, 0x950, - 0x958, 0x961, - 0x971, 0x980, - 0x985, 0x98c, - 0x98f, 0x990, - 0x993, 0x9a8, - 0x9aa, 0x9b0, - 0x9b2, 0x9b2, - 0x9b6, 0x9b9, - 0x9bd, 0x9bd, - 0x9ce, 0x9ce, - 0x9dc, 0x9dd, - 0x9df, 0x9e1, - 0x9f0, 0x9f1, - 0xa05, 0xa0a, - 0xa0f, 0xa10, - 0xa13, 0xa28, - 0xa2a, 0xa30, - 0xa32, 0xa33, - 0xa35, 0xa36, - 0xa38, 0xa39, - 0xa59, 0xa5c, - 0xa5e, 0xa5e, - 0xa72, 0xa74, - 0xa85, 0xa8d, - 0xa8f, 0xa91, - 0xa93, 0xaa8, - 0xaaa, 0xab0, - 0xab2, 0xab3, - 0xab5, 0xab9, - 0xabd, 0xabd, - 0xad0, 0xad0, - 0xae0, 0xae1, - 0xb05, 0xb0c, - 0xb0f, 0xb10, - 0xb13, 0xb28, - 0xb2a, 0xb30, - 0xb32, 0xb33, - 0xb35, 0xb39, - 0xb3d, 0xb3d, - 0xb5c, 0xb5d, - 0xb5f, 0xb61, - 0xb71, 0xb71, - 0xb83, 0xb83, - 0xb85, 0xb8a, - 0xb8e, 0xb90, - 0xb92, 0xb95, - 0xb99, 0xb9a, - 0xb9c, 0xb9c, - 0xb9e, 0xb9f, - 0xba3, 0xba4, - 0xba8, 0xbaa, - 0xbae, 0xbb9, - 0xbd0, 0xbd0, - 0xc05, 0xc0c, - 0xc0e, 0xc10, - 0xc12, 0xc28, - 0xc2a, 0xc39, - 0xc3d, 0xc3d, - 0xc58, 0xc59, - 0xc60, 0xc61, - 0xc85, 0xc8c, - 0xc8e, 0xc90, - 0xc92, 0xca8, - 0xcaa, 0xcb3, - 0xcb5, 0xcb9, - 0xcbd, 0xcbd, - 0xcde, 0xcde, - 0xce0, 0xce1, - 0xcf1, 0xcf2, - 0xd05, 0xd0c, - 0xd0e, 0xd10, - 0xd12, 0xd3a, - 0xd3d, 0xd3d, - 0xd4e, 0xd4e, - 0xd60, 0xd61, - 0xd7a, 0xd7f, - 0xd85, 0xd96, - 0xd9a, 0xdb1, - 0xdb3, 0xdbb, - 0xdbd, 0xdbd, - 0xdc0, 0xdc6, - 0xe01, 0xe30, - 0xe32, 0xe33, - 0xe40, 0xe46, - 0xe81, 0xe82, - 0xe84, 0xe84, - 0xe87, 0xe88, - 0xe8a, 0xe8a, - 0xe8d, 0xe8d, - 0xe94, 0xe97, - 0xe99, 0xe9f, - 0xea1, 0xea3, - 0xea5, 0xea5, - 0xea7, 0xea7, - 0xeaa, 0xeab, - 0xead, 0xeb0, - 0xeb2, 0xeb3, - 0xebd, 0xebd, - 0xec0, 0xec4, - 0xec6, 0xec6, - 0xedc, 0xedf, - 0xf00, 0xf00, - 0xf40, 0xf47, - 0xf49, 0xf6c, - 0xf88, 0xf8c, - 0x1000, 0x102a, - 0x103f, 0x103f, - 0x1050, 0x1055, - 0x105a, 0x105d, - 0x1061, 0x1061, - 0x1065, 0x1066, - 0x106e, 0x1070, - 0x1075, 0x1081, - 0x108e, 0x108e, - 0x10a0, 0x10c5, - 0x10c7, 0x10c7, - 0x10cd, 0x10cd, - 0x10d0, 0x10fa, - 0x10fc, 0x1248, - 0x124a, 0x124d, - 0x1250, 0x1256, - 0x1258, 0x1258, - 0x125a, 0x125d, - 0x1260, 0x1288, - 0x128a, 0x128d, - 0x1290, 0x12b0, - 0x12b2, 0x12b5, - 0x12b8, 0x12be, - 0x12c0, 0x12c0, - 0x12c2, 0x12c5, - 0x12c8, 0x12d6, - 0x12d8, 0x1310, - 0x1312, 0x1315, - 0x1318, 0x135a, - 0x1380, 0x138f, - 0x13a0, 0x13f4, - 0x1401, 0x166c, - 0x166f, 0x167f, - 0x1681, 0x169a, - 0x16a0, 0x16ea, - 0x16f1, 0x16f8, - 0x1700, 0x170c, - 0x170e, 0x1711, - 0x1720, 0x1731, - 0x1740, 0x1751, - 0x1760, 0x176c, - 0x176e, 0x1770, - 0x1780, 0x17b3, - 0x17d7, 0x17d7, - 0x17dc, 0x17dc, - 0x1820, 0x1877, - 0x1880, 0x18a8, - 0x18aa, 0x18aa, - 0x18b0, 0x18f5, - 0x1900, 0x191e, - 0x1950, 0x196d, - 0x1970, 0x1974, - 0x1980, 0x19ab, - 0x19c1, 0x19c7, - 0x1a00, 0x1a16, - 0x1a20, 0x1a54, - 0x1aa7, 0x1aa7, - 0x1b05, 0x1b33, - 0x1b45, 0x1b4b, - 0x1b83, 0x1ba0, - 0x1bae, 0x1baf, - 0x1bba, 0x1be5, - 0x1c00, 0x1c23, - 0x1c4d, 0x1c4f, - 0x1c5a, 0x1c7d, - 0x1ce9, 0x1cec, - 0x1cee, 0x1cf1, - 0x1cf5, 0x1cf6, - 0x1d00, 0x1dbf, - 0x1e00, 0x1f15, - 0x1f18, 0x1f1d, - 0x1f20, 0x1f45, - 0x1f48, 0x1f4d, - 0x1f50, 0x1f57, - 0x1f59, 0x1f59, - 0x1f5b, 0x1f5b, - 0x1f5d, 0x1f5d, - 0x1f5f, 0x1f7d, - 0x1f80, 0x1fb4, - 0x1fb6, 0x1fbc, - 0x1fbe, 0x1fbe, - 0x1fc2, 0x1fc4, - 0x1fc6, 0x1fcc, - 0x1fd0, 0x1fd3, - 0x1fd6, 0x1fdb, - 0x1fe0, 0x1fec, - 0x1ff2, 0x1ff4, - 0x1ff6, 0x1ffc, - 0x2071, 0x2071, - 0x207f, 0x207f, - 0x2090, 0x209c, - 0x2102, 0x2102, - 0x2107, 0x2107, - 0x210a, 0x2113, - 0x2115, 0x2115, - 0x2119, 0x211d, - 0x2124, 0x2124, - 0x2126, 0x2126, - 0x2128, 0x2128, - 0x212a, 0x212d, - 0x212f, 0x2139, - 0x213c, 0x213f, - 0x2145, 0x2149, - 0x214e, 0x214e, - 0x2183, 0x2184, - 0x2c00, 0x2c2e, - 0x2c30, 0x2c5e, - 0x2c60, 0x2ce4, - 0x2ceb, 0x2cee, - 0x2cf2, 0x2cf3, - 0x2d00, 0x2d25, - 0x2d27, 0x2d27, - 0x2d2d, 0x2d2d, - 0x2d30, 0x2d67, - 0x2d6f, 0x2d6f, - 0x2d80, 0x2d96, - 0x2da0, 0x2da6, - 0x2da8, 0x2dae, - 0x2db0, 0x2db6, - 0x2db8, 0x2dbe, - 0x2dc0, 0x2dc6, - 0x2dc8, 0x2dce, - 0x2dd0, 0x2dd6, - 0x2dd8, 0x2dde, - 0x2e2f, 0x2e2f, - 0x3005, 0x3006, - 0x3031, 0x3035, - 0x303b, 0x303c, - 0x3041, 0x3096, - 0x309d, 0x309f, - 0x30a1, 0x30fa, - 0x30fc, 0x30ff, - 0x3105, 0x312d, - 0x3131, 0x318e, - 0x31a0, 0x31ba, - 0x31f0, 0x31ff, - 0x3400, 0x4db5, - 0x4e00, 0x9fcc, - 0xa000, 0xa48c, - 0xa4d0, 0xa4fd, - 0xa500, 0xa60c, - 0xa610, 0xa61f, - 0xa62a, 0xa62b, - 0xa640, 0xa66e, - 0xa67f, 0xa69d, - 0xa6a0, 0xa6e5, - 0xa717, 0xa71f, - 0xa722, 0xa788, - 0xa78b, 0xa78e, - 0xa790, 0xa7ad, - 0xa7b0, 0xa7b1, - 0xa7f7, 0xa801, - 0xa803, 0xa805, - 0xa807, 0xa80a, - 0xa80c, 0xa822, - 0xa840, 0xa873, - 0xa882, 0xa8b3, - 0xa8f2, 0xa8f7, - 0xa8fb, 0xa8fb, - 0xa90a, 0xa925, - 0xa930, 0xa946, - 0xa960, 0xa97c, - 0xa984, 0xa9b2, - 0xa9cf, 0xa9cf, - 0xa9e0, 0xa9e4, - 0xa9e6, 0xa9ef, - 0xa9fa, 0xa9fe, - 0xaa00, 0xaa28, - 0xaa40, 0xaa42, - 0xaa44, 0xaa4b, - 0xaa60, 0xaa76, - 0xaa7a, 0xaa7a, - 0xaa7e, 0xaaaf, - 0xaab1, 0xaab1, - 0xaab5, 0xaab6, - 0xaab9, 0xaabd, - 0xaac0, 0xaac0, - 0xaac2, 0xaac2, - 0xaadb, 0xaadd, - 0xaae0, 0xaaea, - 0xaaf2, 0xaaf4, - 0xab01, 0xab06, - 0xab09, 0xab0e, - 0xab11, 0xab16, - 0xab20, 0xab26, - 0xab28, 0xab2e, - 0xab30, 0xab5a, - 0xab5c, 0xab5f, - 0xab64, 0xab65, - 0xabc0, 0xabe2, - 0xac00, 0xd7a3, - 0xd7b0, 0xd7c6, - 0xd7cb, 0xd7fb, - 0xf900, 0xfa6d, - 0xfa70, 0xfad9, - 0xfb00, 0xfb06, - 0xfb13, 0xfb17, - 0xfb1d, 0xfb1d, - 0xfb1f, 0xfb28, - 0xfb2a, 0xfb36, - 0xfb38, 0xfb3c, - 0xfb3e, 0xfb3e, - 0xfb40, 0xfb41, - 0xfb43, 0xfb44, - 0xfb46, 0xfbb1, - 0xfbd3, 0xfd3d, - 0xfd50, 0xfd8f, - 0xfd92, 0xfdc7, - 0xfdf0, 0xfdfb, - 0xfe70, 0xfe74, - 0xfe76, 0xfefc, - 0xff21, 0xff3a, - 0xff41, 0xff5a, - 0xff66, 0xffbe, - 0xffc2, 0xffc7, - 0xffca, 0xffcf, - 0xffd2, 0xffd7, - 0xffda, 0xffdc, - 0x10000, 0x1000b, - 0x1000d, 0x10026, - 0x10028, 0x1003a, - 0x1003c, 0x1003d, - 0x1003f, 0x1004d, - 0x10050, 0x1005d, - 0x10080, 0x100fa, - 0x10280, 0x1029c, - 0x102a0, 0x102d0, - 0x10300, 0x1031f, - 0x10330, 0x10340, - 0x10342, 0x10349, - 0x10350, 0x10375, - 0x10380, 0x1039d, - 0x103a0, 0x103c3, - 0x103c8, 0x103cf, - 0x10400, 0x1049d, - 0x10500, 0x10527, - 0x10530, 0x10563, - 0x10600, 0x10736, - 0x10740, 0x10755, - 0x10760, 0x10767, - 0x10800, 0x10805, - 0x10808, 0x10808, - 0x1080a, 0x10835, - 0x10837, 0x10838, - 0x1083c, 0x1083c, - 0x1083f, 0x10855, - 0x10860, 0x10876, - 0x10880, 0x1089e, - 0x10900, 0x10915, - 0x10920, 0x10939, - 0x10980, 0x109b7, - 0x109be, 0x109bf, - 0x10a00, 0x10a00, - 0x10a10, 0x10a13, - 0x10a15, 0x10a17, - 0x10a19, 0x10a33, - 0x10a60, 0x10a7c, - 0x10a80, 0x10a9c, - 0x10ac0, 0x10ac7, - 0x10ac9, 0x10ae4, - 0x10b00, 0x10b35, - 0x10b40, 0x10b55, - 0x10b60, 0x10b72, - 0x10b80, 0x10b91, - 0x10c00, 0x10c48, - 0x11003, 0x11037, - 0x11083, 0x110af, - 0x110d0, 0x110e8, - 0x11103, 0x11126, - 0x11150, 0x11172, - 0x11176, 0x11176, - 0x11183, 0x111b2, - 0x111c1, 0x111c4, - 0x111da, 0x111da, - 0x11200, 0x11211, - 0x11213, 0x1122b, - 0x112b0, 0x112de, - 0x11305, 0x1130c, - 0x1130f, 0x11310, - 0x11313, 0x11328, - 0x1132a, 0x11330, - 0x11332, 0x11333, - 0x11335, 0x11339, - 0x1133d, 0x1133d, - 0x1135d, 0x11361, - 0x11480, 0x114af, - 0x114c4, 0x114c5, - 0x114c7, 0x114c7, - 0x11580, 0x115ae, - 0x11600, 0x1162f, - 0x11644, 0x11644, - 0x11680, 0x116aa, - 0x118a0, 0x118df, - 0x118ff, 0x118ff, - 0x11ac0, 0x11af8, - 0x12000, 0x12398, - 0x13000, 0x1342e, - 0x16800, 0x16a38, - 0x16a40, 0x16a5e, - 0x16ad0, 0x16aed, - 0x16b00, 0x16b2f, - 0x16b40, 0x16b43, - 0x16b63, 0x16b77, - 0x16b7d, 0x16b8f, - 0x16f00, 0x16f44, - 0x16f50, 0x16f50, - 0x16f93, 0x16f9f, - 0x1b000, 0x1b001, - 0x1bc00, 0x1bc6a, - 0x1bc70, 0x1bc7c, - 0x1bc80, 0x1bc88, - 0x1bc90, 0x1bc99, - 0x1d400, 0x1d454, - 0x1d456, 0x1d49c, - 0x1d49e, 0x1d49f, - 0x1d4a2, 0x1d4a2, - 0x1d4a5, 0x1d4a6, - 0x1d4a9, 0x1d4ac, - 0x1d4ae, 0x1d4b9, - 0x1d4bb, 0x1d4bb, - 0x1d4bd, 0x1d4c3, - 0x1d4c5, 0x1d505, - 0x1d507, 0x1d50a, - 0x1d50d, 0x1d514, - 0x1d516, 0x1d51c, - 0x1d51e, 0x1d539, - 0x1d53b, 0x1d53e, - 0x1d540, 0x1d544, - 0x1d546, 0x1d546, - 0x1d54a, 0x1d550, - 0x1d552, 0x1d6a5, - 0x1d6a8, 0x1d6c0, - 0x1d6c2, 0x1d6da, - 0x1d6dc, 0x1d6fa, - 0x1d6fc, 0x1d714, - 0x1d716, 0x1d734, - 0x1d736, 0x1d74e, - 0x1d750, 0x1d76e, - 0x1d770, 0x1d788, - 0x1d78a, 0x1d7a8, - 0x1d7aa, 0x1d7c2, - 0x1d7c4, 0x1d7cb, - 0x1e800, 0x1e8c4, - 0x1ee00, 0x1ee03, - 0x1ee05, 0x1ee1f, - 0x1ee21, 0x1ee22, - 0x1ee24, 0x1ee24, - 0x1ee27, 0x1ee27, - 0x1ee29, 0x1ee32, - 0x1ee34, 0x1ee37, - 0x1ee39, 0x1ee39, - 0x1ee3b, 0x1ee3b, - 0x1ee42, 0x1ee42, - 0x1ee47, 0x1ee47, - 0x1ee49, 0x1ee49, - 0x1ee4b, 0x1ee4b, - 0x1ee4d, 0x1ee4f, - 0x1ee51, 0x1ee52, - 0x1ee54, 0x1ee54, - 0x1ee57, 0x1ee57, - 0x1ee59, 0x1ee59, - 0x1ee5b, 0x1ee5b, - 0x1ee5d, 0x1ee5d, - 0x1ee5f, 0x1ee5f, - 0x1ee61, 0x1ee62, - 0x1ee64, 0x1ee64, - 0x1ee67, 0x1ee6a, - 0x1ee6c, 0x1ee72, - 0x1ee74, 0x1ee77, - 0x1ee79, 0x1ee7c, - 0x1ee7e, 0x1ee7e, - 0x1ee80, 0x1ee89, - 0x1ee8b, 0x1ee9b, - 0x1eea1, 0x1eea3, - 0x1eea5, 0x1eea9, - 0x1eeab, 0x1eebb, - 0x20000, 0x2a6d6, - 0x2a700, 0x2b734, - 0x2b740, 0x2b81d, - 0x2f800, 0x2fa1d -}; -UCP_FN(L) - -static const unichar ucp_L_and_def[] = { - 0x41, 0x5a, - 0x61, 0x7a, - 0xb5, 0xb5, - 0xc0, 0xd6, - 0xd8, 0xf6, - 0xf8, 0x1ba, - 0x1bc, 0x1bf, - 0x1c4, 0x293, - 0x295, 0x2af, - 0x370, 0x373, - 0x376, 0x377, - 0x37b, 0x37d, - 0x37f, 0x37f, - 0x386, 0x386, - 0x388, 0x38a, - 0x38c, 0x38c, - 0x38e, 0x3a1, - 0x3a3, 0x3f5, - 0x3f7, 0x481, - 0x48a, 0x52f, - 0x531, 0x556, - 0x561, 0x587, - 0x10a0, 0x10c5, - 0x10c7, 0x10c7, - 0x10cd, 0x10cd, - 0x1d00, 0x1d2b, - 0x1d6b, 0x1d77, - 0x1d79, 0x1d9a, - 0x1e00, 0x1f15, - 0x1f18, 0x1f1d, - 0x1f20, 0x1f45, - 0x1f48, 0x1f4d, - 0x1f50, 0x1f57, - 0x1f59, 0x1f59, - 0x1f5b, 0x1f5b, - 0x1f5d, 0x1f5d, - 0x1f5f, 0x1f7d, - 0x1f80, 0x1fb4, - 0x1fb6, 0x1fbc, - 0x1fbe, 0x1fbe, - 0x1fc2, 0x1fc4, - 0x1fc6, 0x1fcc, - 0x1fd0, 0x1fd3, - 0x1fd6, 0x1fdb, - 0x1fe0, 0x1fec, - 0x1ff2, 0x1ff4, - 0x1ff6, 0x1ffc, - 0x2102, 0x2102, - 0x2107, 0x2107, - 0x210a, 0x2113, - 0x2115, 0x2115, - 0x2119, 0x211d, - 0x2124, 0x2124, - 0x2126, 0x2126, - 0x2128, 0x2128, - 0x212a, 0x212d, - 0x212f, 0x2134, - 0x2139, 0x2139, - 0x213c, 0x213f, - 0x2145, 0x2149, - 0x214e, 0x214e, - 0x2183, 0x2184, - 0x2c00, 0x2c2e, - 0x2c30, 0x2c5e, - 0x2c60, 0x2c7b, - 0x2c7e, 0x2ce4, - 0x2ceb, 0x2cee, - 0x2cf2, 0x2cf3, - 0x2d00, 0x2d25, - 0x2d27, 0x2d27, - 0x2d2d, 0x2d2d, - 0xa640, 0xa66d, - 0xa680, 0xa69b, - 0xa722, 0xa76f, - 0xa771, 0xa787, - 0xa78b, 0xa78e, - 0xa790, 0xa7ad, - 0xa7b0, 0xa7b1, - 0xa7fa, 0xa7fa, - 0xab30, 0xab5a, - 0xab64, 0xab65, - 0xfb00, 0xfb06, - 0xfb13, 0xfb17, - 0xff21, 0xff3a, - 0xff41, 0xff5a, - 0x10400, 0x1044f, - 0x118a0, 0x118df, - 0x1d400, 0x1d454, - 0x1d456, 0x1d49c, - 0x1d49e, 0x1d49f, - 0x1d4a2, 0x1d4a2, - 0x1d4a5, 0x1d4a6, - 0x1d4a9, 0x1d4ac, - 0x1d4ae, 0x1d4b9, - 0x1d4bb, 0x1d4bb, - 0x1d4bd, 0x1d4c3, - 0x1d4c5, 0x1d505, - 0x1d507, 0x1d50a, - 0x1d50d, 0x1d514, - 0x1d516, 0x1d51c, - 0x1d51e, 0x1d539, - 0x1d53b, 0x1d53e, - 0x1d540, 0x1d544, - 0x1d546, 0x1d546, - 0x1d54a, 0x1d550, - 0x1d552, 0x1d6a5, - 0x1d6a8, 0x1d6c0, - 0x1d6c2, 0x1d6da, - 0x1d6dc, 0x1d6fa, - 0x1d6fc, 0x1d714, - 0x1d716, 0x1d734, - 0x1d736, 0x1d74e, - 0x1d750, 0x1d76e, - 0x1d770, 0x1d788, - 0x1d78a, 0x1d7a8, - 0x1d7aa, 0x1d7c2, - 0x1d7c4, 0x1d7cb -}; -UCP_FN(L_and) - -static const unichar ucp_Ll_def[] = { - 0x61, 0x7a, - 0xb5, 0xb5, - 0xdf, 0xf6, - 0xf8, 0xff, - 0x101, 0x101, - 0x103, 0x103, - 0x105, 0x105, - 0x107, 0x107, - 0x109, 0x109, - 0x10b, 0x10b, - 0x10d, 0x10d, - 0x10f, 0x10f, - 0x111, 0x111, - 0x113, 0x113, - 0x115, 0x115, - 0x117, 0x117, - 0x119, 0x119, - 0x11b, 0x11b, - 0x11d, 0x11d, - 0x11f, 0x11f, - 0x121, 0x121, - 0x123, 0x123, - 0x125, 0x125, - 0x127, 0x127, - 0x129, 0x129, - 0x12b, 0x12b, - 0x12d, 0x12d, - 0x12f, 0x12f, - 0x131, 0x131, - 0x133, 0x133, - 0x135, 0x135, - 0x137, 0x138, - 0x13a, 0x13a, - 0x13c, 0x13c, - 0x13e, 0x13e, - 0x140, 0x140, - 0x142, 0x142, - 0x144, 0x144, - 0x146, 0x146, - 0x148, 0x149, - 0x14b, 0x14b, - 0x14d, 0x14d, - 0x14f, 0x14f, - 0x151, 0x151, - 0x153, 0x153, - 0x155, 0x155, - 0x157, 0x157, - 0x159, 0x159, - 0x15b, 0x15b, - 0x15d, 0x15d, - 0x15f, 0x15f, - 0x161, 0x161, - 0x163, 0x163, - 0x165, 0x165, - 0x167, 0x167, - 0x169, 0x169, - 0x16b, 0x16b, - 0x16d, 0x16d, - 0x16f, 0x16f, - 0x171, 0x171, - 0x173, 0x173, - 0x175, 0x175, - 0x177, 0x177, - 0x17a, 0x17a, - 0x17c, 0x17c, - 0x17e, 0x180, - 0x183, 0x183, - 0x185, 0x185, - 0x188, 0x188, - 0x18c, 0x18d, - 0x192, 0x192, - 0x195, 0x195, - 0x199, 0x19b, - 0x19e, 0x19e, - 0x1a1, 0x1a1, - 0x1a3, 0x1a3, - 0x1a5, 0x1a5, - 0x1a8, 0x1a8, - 0x1aa, 0x1ab, - 0x1ad, 0x1ad, - 0x1b0, 0x1b0, - 0x1b4, 0x1b4, - 0x1b6, 0x1b6, - 0x1b9, 0x1ba, - 0x1bd, 0x1bf, - 0x1c6, 0x1c6, - 0x1c9, 0x1c9, - 0x1cc, 0x1cc, - 0x1ce, 0x1ce, - 0x1d0, 0x1d0, - 0x1d2, 0x1d2, - 0x1d4, 0x1d4, - 0x1d6, 0x1d6, - 0x1d8, 0x1d8, - 0x1da, 0x1da, - 0x1dc, 0x1dd, - 0x1df, 0x1df, - 0x1e1, 0x1e1, - 0x1e3, 0x1e3, - 0x1e5, 0x1e5, - 0x1e7, 0x1e7, - 0x1e9, 0x1e9, - 0x1eb, 0x1eb, - 0x1ed, 0x1ed, - 0x1ef, 0x1f0, - 0x1f3, 0x1f3, - 0x1f5, 0x1f5, - 0x1f9, 0x1f9, - 0x1fb, 0x1fb, - 0x1fd, 0x1fd, - 0x1ff, 0x1ff, - 0x201, 0x201, - 0x203, 0x203, - 0x205, 0x205, - 0x207, 0x207, - 0x209, 0x209, - 0x20b, 0x20b, - 0x20d, 0x20d, - 0x20f, 0x20f, - 0x211, 0x211, - 0x213, 0x213, - 0x215, 0x215, - 0x217, 0x217, - 0x219, 0x219, - 0x21b, 0x21b, - 0x21d, 0x21d, - 0x21f, 0x21f, - 0x221, 0x221, - 0x223, 0x223, - 0x225, 0x225, - 0x227, 0x227, - 0x229, 0x229, - 0x22b, 0x22b, - 0x22d, 0x22d, - 0x22f, 0x22f, - 0x231, 0x231, - 0x233, 0x239, - 0x23c, 0x23c, - 0x23f, 0x240, - 0x242, 0x242, - 0x247, 0x247, - 0x249, 0x249, - 0x24b, 0x24b, - 0x24d, 0x24d, - 0x24f, 0x293, - 0x295, 0x2af, - 0x371, 0x371, - 0x373, 0x373, - 0x377, 0x377, - 0x37b, 0x37d, - 0x390, 0x390, - 0x3ac, 0x3ce, - 0x3d0, 0x3d1, - 0x3d5, 0x3d7, - 0x3d9, 0x3d9, - 0x3db, 0x3db, - 0x3dd, 0x3dd, - 0x3df, 0x3df, - 0x3e1, 0x3e1, - 0x3e3, 0x3e3, - 0x3e5, 0x3e5, - 0x3e7, 0x3e7, - 0x3e9, 0x3e9, - 0x3eb, 0x3eb, - 0x3ed, 0x3ed, - 0x3ef, 0x3f3, - 0x3f5, 0x3f5, - 0x3f8, 0x3f8, - 0x3fb, 0x3fc, - 0x430, 0x45f, - 0x461, 0x461, - 0x463, 0x463, - 0x465, 0x465, - 0x467, 0x467, - 0x469, 0x469, - 0x46b, 0x46b, - 0x46d, 0x46d, - 0x46f, 0x46f, - 0x471, 0x471, - 0x473, 0x473, - 0x475, 0x475, - 0x477, 0x477, - 0x479, 0x479, - 0x47b, 0x47b, - 0x47d, 0x47d, - 0x47f, 0x47f, - 0x481, 0x481, - 0x48b, 0x48b, - 0x48d, 0x48d, - 0x48f, 0x48f, - 0x491, 0x491, - 0x493, 0x493, - 0x495, 0x495, - 0x497, 0x497, - 0x499, 0x499, - 0x49b, 0x49b, - 0x49d, 0x49d, - 0x49f, 0x49f, - 0x4a1, 0x4a1, - 0x4a3, 0x4a3, - 0x4a5, 0x4a5, - 0x4a7, 0x4a7, - 0x4a9, 0x4a9, - 0x4ab, 0x4ab, - 0x4ad, 0x4ad, - 0x4af, 0x4af, - 0x4b1, 0x4b1, - 0x4b3, 0x4b3, - 0x4b5, 0x4b5, - 0x4b7, 0x4b7, - 0x4b9, 0x4b9, - 0x4bb, 0x4bb, - 0x4bd, 0x4bd, - 0x4bf, 0x4bf, - 0x4c2, 0x4c2, - 0x4c4, 0x4c4, - 0x4c6, 0x4c6, - 0x4c8, 0x4c8, - 0x4ca, 0x4ca, - 0x4cc, 0x4cc, - 0x4ce, 0x4cf, - 0x4d1, 0x4d1, - 0x4d3, 0x4d3, - 0x4d5, 0x4d5, - 0x4d7, 0x4d7, - 0x4d9, 0x4d9, - 0x4db, 0x4db, - 0x4dd, 0x4dd, - 0x4df, 0x4df, - 0x4e1, 0x4e1, - 0x4e3, 0x4e3, - 0x4e5, 0x4e5, - 0x4e7, 0x4e7, - 0x4e9, 0x4e9, - 0x4eb, 0x4eb, - 0x4ed, 0x4ed, - 0x4ef, 0x4ef, - 0x4f1, 0x4f1, - 0x4f3, 0x4f3, - 0x4f5, 0x4f5, - 0x4f7, 0x4f7, - 0x4f9, 0x4f9, - 0x4fb, 0x4fb, - 0x4fd, 0x4fd, - 0x4ff, 0x4ff, - 0x501, 0x501, - 0x503, 0x503, - 0x505, 0x505, - 0x507, 0x507, - 0x509, 0x509, - 0x50b, 0x50b, - 0x50d, 0x50d, - 0x50f, 0x50f, - 0x511, 0x511, - 0x513, 0x513, - 0x515, 0x515, - 0x517, 0x517, - 0x519, 0x519, - 0x51b, 0x51b, - 0x51d, 0x51d, - 0x51f, 0x51f, - 0x521, 0x521, - 0x523, 0x523, - 0x525, 0x525, - 0x527, 0x527, - 0x529, 0x529, - 0x52b, 0x52b, - 0x52d, 0x52d, - 0x52f, 0x52f, - 0x561, 0x587, - 0x1d00, 0x1d2b, - 0x1d6b, 0x1d77, - 0x1d79, 0x1d9a, - 0x1e01, 0x1e01, - 0x1e03, 0x1e03, - 0x1e05, 0x1e05, - 0x1e07, 0x1e07, - 0x1e09, 0x1e09, - 0x1e0b, 0x1e0b, - 0x1e0d, 0x1e0d, - 0x1e0f, 0x1e0f, - 0x1e11, 0x1e11, - 0x1e13, 0x1e13, - 0x1e15, 0x1e15, - 0x1e17, 0x1e17, - 0x1e19, 0x1e19, - 0x1e1b, 0x1e1b, - 0x1e1d, 0x1e1d, - 0x1e1f, 0x1e1f, - 0x1e21, 0x1e21, - 0x1e23, 0x1e23, - 0x1e25, 0x1e25, - 0x1e27, 0x1e27, - 0x1e29, 0x1e29, - 0x1e2b, 0x1e2b, - 0x1e2d, 0x1e2d, - 0x1e2f, 0x1e2f, - 0x1e31, 0x1e31, - 0x1e33, 0x1e33, - 0x1e35, 0x1e35, - 0x1e37, 0x1e37, - 0x1e39, 0x1e39, - 0x1e3b, 0x1e3b, - 0x1e3d, 0x1e3d, - 0x1e3f, 0x1e3f, - 0x1e41, 0x1e41, - 0x1e43, 0x1e43, - 0x1e45, 0x1e45, - 0x1e47, 0x1e47, - 0x1e49, 0x1e49, - 0x1e4b, 0x1e4b, - 0x1e4d, 0x1e4d, - 0x1e4f, 0x1e4f, - 0x1e51, 0x1e51, - 0x1e53, 0x1e53, - 0x1e55, 0x1e55, - 0x1e57, 0x1e57, - 0x1e59, 0x1e59, - 0x1e5b, 0x1e5b, - 0x1e5d, 0x1e5d, - 0x1e5f, 0x1e5f, - 0x1e61, 0x1e61, - 0x1e63, 0x1e63, - 0x1e65, 0x1e65, - 0x1e67, 0x1e67, - 0x1e69, 0x1e69, - 0x1e6b, 0x1e6b, - 0x1e6d, 0x1e6d, - 0x1e6f, 0x1e6f, - 0x1e71, 0x1e71, - 0x1e73, 0x1e73, - 0x1e75, 0x1e75, - 0x1e77, 0x1e77, - 0x1e79, 0x1e79, - 0x1e7b, 0x1e7b, - 0x1e7d, 0x1e7d, - 0x1e7f, 0x1e7f, - 0x1e81, 0x1e81, - 0x1e83, 0x1e83, - 0x1e85, 0x1e85, - 0x1e87, 0x1e87, - 0x1e89, 0x1e89, - 0x1e8b, 0x1e8b, - 0x1e8d, 0x1e8d, - 0x1e8f, 0x1e8f, - 0x1e91, 0x1e91, - 0x1e93, 0x1e93, - 0x1e95, 0x1e9d, - 0x1e9f, 0x1e9f, - 0x1ea1, 0x1ea1, - 0x1ea3, 0x1ea3, - 0x1ea5, 0x1ea5, - 0x1ea7, 0x1ea7, - 0x1ea9, 0x1ea9, - 0x1eab, 0x1eab, - 0x1ead, 0x1ead, - 0x1eaf, 0x1eaf, - 0x1eb1, 0x1eb1, - 0x1eb3, 0x1eb3, - 0x1eb5, 0x1eb5, - 0x1eb7, 0x1eb7, - 0x1eb9, 0x1eb9, - 0x1ebb, 0x1ebb, - 0x1ebd, 0x1ebd, - 0x1ebf, 0x1ebf, - 0x1ec1, 0x1ec1, - 0x1ec3, 0x1ec3, - 0x1ec5, 0x1ec5, - 0x1ec7, 0x1ec7, - 0x1ec9, 0x1ec9, - 0x1ecb, 0x1ecb, - 0x1ecd, 0x1ecd, - 0x1ecf, 0x1ecf, - 0x1ed1, 0x1ed1, - 0x1ed3, 0x1ed3, - 0x1ed5, 0x1ed5, - 0x1ed7, 0x1ed7, - 0x1ed9, 0x1ed9, - 0x1edb, 0x1edb, - 0x1edd, 0x1edd, - 0x1edf, 0x1edf, - 0x1ee1, 0x1ee1, - 0x1ee3, 0x1ee3, - 0x1ee5, 0x1ee5, - 0x1ee7, 0x1ee7, - 0x1ee9, 0x1ee9, - 0x1eeb, 0x1eeb, - 0x1eed, 0x1eed, - 0x1eef, 0x1eef, - 0x1ef1, 0x1ef1, - 0x1ef3, 0x1ef3, - 0x1ef5, 0x1ef5, - 0x1ef7, 0x1ef7, - 0x1ef9, 0x1ef9, - 0x1efb, 0x1efb, - 0x1efd, 0x1efd, - 0x1eff, 0x1f07, - 0x1f10, 0x1f15, - 0x1f20, 0x1f27, - 0x1f30, 0x1f37, - 0x1f40, 0x1f45, - 0x1f50, 0x1f57, - 0x1f60, 0x1f67, - 0x1f70, 0x1f7d, - 0x1f80, 0x1f87, - 0x1f90, 0x1f97, - 0x1fa0, 0x1fa7, - 0x1fb0, 0x1fb4, - 0x1fb6, 0x1fb7, - 0x1fbe, 0x1fbe, - 0x1fc2, 0x1fc4, - 0x1fc6, 0x1fc7, - 0x1fd0, 0x1fd3, - 0x1fd6, 0x1fd7, - 0x1fe0, 0x1fe7, - 0x1ff2, 0x1ff4, - 0x1ff6, 0x1ff7, - 0x210a, 0x210a, - 0x210e, 0x210f, - 0x2113, 0x2113, - 0x212f, 0x212f, - 0x2134, 0x2134, - 0x2139, 0x2139, - 0x213c, 0x213d, - 0x2146, 0x2149, - 0x214e, 0x214e, - 0x2184, 0x2184, - 0x2c30, 0x2c5e, - 0x2c61, 0x2c61, - 0x2c65, 0x2c66, - 0x2c68, 0x2c68, - 0x2c6a, 0x2c6a, - 0x2c6c, 0x2c6c, - 0x2c71, 0x2c71, - 0x2c73, 0x2c74, - 0x2c76, 0x2c7b, - 0x2c81, 0x2c81, - 0x2c83, 0x2c83, - 0x2c85, 0x2c85, - 0x2c87, 0x2c87, - 0x2c89, 0x2c89, - 0x2c8b, 0x2c8b, - 0x2c8d, 0x2c8d, - 0x2c8f, 0x2c8f, - 0x2c91, 0x2c91, - 0x2c93, 0x2c93, - 0x2c95, 0x2c95, - 0x2c97, 0x2c97, - 0x2c99, 0x2c99, - 0x2c9b, 0x2c9b, - 0x2c9d, 0x2c9d, - 0x2c9f, 0x2c9f, - 0x2ca1, 0x2ca1, - 0x2ca3, 0x2ca3, - 0x2ca5, 0x2ca5, - 0x2ca7, 0x2ca7, - 0x2ca9, 0x2ca9, - 0x2cab, 0x2cab, - 0x2cad, 0x2cad, - 0x2caf, 0x2caf, - 0x2cb1, 0x2cb1, - 0x2cb3, 0x2cb3, - 0x2cb5, 0x2cb5, - 0x2cb7, 0x2cb7, - 0x2cb9, 0x2cb9, - 0x2cbb, 0x2cbb, - 0x2cbd, 0x2cbd, - 0x2cbf, 0x2cbf, - 0x2cc1, 0x2cc1, - 0x2cc3, 0x2cc3, - 0x2cc5, 0x2cc5, - 0x2cc7, 0x2cc7, - 0x2cc9, 0x2cc9, - 0x2ccb, 0x2ccb, - 0x2ccd, 0x2ccd, - 0x2ccf, 0x2ccf, - 0x2cd1, 0x2cd1, - 0x2cd3, 0x2cd3, - 0x2cd5, 0x2cd5, - 0x2cd7, 0x2cd7, - 0x2cd9, 0x2cd9, - 0x2cdb, 0x2cdb, - 0x2cdd, 0x2cdd, - 0x2cdf, 0x2cdf, - 0x2ce1, 0x2ce1, - 0x2ce3, 0x2ce4, - 0x2cec, 0x2cec, - 0x2cee, 0x2cee, - 0x2cf3, 0x2cf3, - 0x2d00, 0x2d25, - 0x2d27, 0x2d27, - 0x2d2d, 0x2d2d, - 0xa641, 0xa641, - 0xa643, 0xa643, - 0xa645, 0xa645, - 0xa647, 0xa647, - 0xa649, 0xa649, - 0xa64b, 0xa64b, - 0xa64d, 0xa64d, - 0xa64f, 0xa64f, - 0xa651, 0xa651, - 0xa653, 0xa653, - 0xa655, 0xa655, - 0xa657, 0xa657, - 0xa659, 0xa659, - 0xa65b, 0xa65b, - 0xa65d, 0xa65d, - 0xa65f, 0xa65f, - 0xa661, 0xa661, - 0xa663, 0xa663, - 0xa665, 0xa665, - 0xa667, 0xa667, - 0xa669, 0xa669, - 0xa66b, 0xa66b, - 0xa66d, 0xa66d, - 0xa681, 0xa681, - 0xa683, 0xa683, - 0xa685, 0xa685, - 0xa687, 0xa687, - 0xa689, 0xa689, - 0xa68b, 0xa68b, - 0xa68d, 0xa68d, - 0xa68f, 0xa68f, - 0xa691, 0xa691, - 0xa693, 0xa693, - 0xa695, 0xa695, - 0xa697, 0xa697, - 0xa699, 0xa699, - 0xa69b, 0xa69b, - 0xa723, 0xa723, - 0xa725, 0xa725, - 0xa727, 0xa727, - 0xa729, 0xa729, - 0xa72b, 0xa72b, - 0xa72d, 0xa72d, - 0xa72f, 0xa731, - 0xa733, 0xa733, - 0xa735, 0xa735, - 0xa737, 0xa737, - 0xa739, 0xa739, - 0xa73b, 0xa73b, - 0xa73d, 0xa73d, - 0xa73f, 0xa73f, - 0xa741, 0xa741, - 0xa743, 0xa743, - 0xa745, 0xa745, - 0xa747, 0xa747, - 0xa749, 0xa749, - 0xa74b, 0xa74b, - 0xa74d, 0xa74d, - 0xa74f, 0xa74f, - 0xa751, 0xa751, - 0xa753, 0xa753, - 0xa755, 0xa755, - 0xa757, 0xa757, - 0xa759, 0xa759, - 0xa75b, 0xa75b, - 0xa75d, 0xa75d, - 0xa75f, 0xa75f, - 0xa761, 0xa761, - 0xa763, 0xa763, - 0xa765, 0xa765, - 0xa767, 0xa767, - 0xa769, 0xa769, - 0xa76b, 0xa76b, - 0xa76d, 0xa76d, - 0xa76f, 0xa76f, - 0xa771, 0xa778, - 0xa77a, 0xa77a, - 0xa77c, 0xa77c, - 0xa77f, 0xa77f, - 0xa781, 0xa781, - 0xa783, 0xa783, - 0xa785, 0xa785, - 0xa787, 0xa787, - 0xa78c, 0xa78c, - 0xa78e, 0xa78e, - 0xa791, 0xa791, - 0xa793, 0xa795, - 0xa797, 0xa797, - 0xa799, 0xa799, - 0xa79b, 0xa79b, - 0xa79d, 0xa79d, - 0xa79f, 0xa79f, - 0xa7a1, 0xa7a1, - 0xa7a3, 0xa7a3, - 0xa7a5, 0xa7a5, - 0xa7a7, 0xa7a7, - 0xa7a9, 0xa7a9, - 0xa7fa, 0xa7fa, - 0xab30, 0xab5a, - 0xab64, 0xab65, - 0xfb00, 0xfb06, - 0xfb13, 0xfb17, - 0xff41, 0xff5a, - 0x10428, 0x1044f, - 0x118c0, 0x118df, - 0x1d41a, 0x1d433, - 0x1d44e, 0x1d454, - 0x1d456, 0x1d467, - 0x1d482, 0x1d49b, - 0x1d4b6, 0x1d4b9, - 0x1d4bb, 0x1d4bb, - 0x1d4bd, 0x1d4c3, - 0x1d4c5, 0x1d4cf, - 0x1d4ea, 0x1d503, - 0x1d51e, 0x1d537, - 0x1d552, 0x1d56b, - 0x1d586, 0x1d59f, - 0x1d5ba, 0x1d5d3, - 0x1d5ee, 0x1d607, - 0x1d622, 0x1d63b, - 0x1d656, 0x1d66f, - 0x1d68a, 0x1d6a5, - 0x1d6c2, 0x1d6da, - 0x1d6dc, 0x1d6e1, - 0x1d6fc, 0x1d714, - 0x1d716, 0x1d71b, - 0x1d736, 0x1d74e, - 0x1d750, 0x1d755, - 0x1d770, 0x1d788, - 0x1d78a, 0x1d78f, - 0x1d7aa, 0x1d7c2, - 0x1d7c4, 0x1d7c9, - 0x1d7cb, 0x1d7cb -}; -UCP_FN(Ll) - -static const unichar ucp_Lm_def[] = { - 0x2b0, 0x2c1, - 0x2c6, 0x2d1, - 0x2e0, 0x2e4, - 0x2ec, 0x2ec, - 0x2ee, 0x2ee, - 0x374, 0x374, - 0x37a, 0x37a, - 0x559, 0x559, - 0x640, 0x640, - 0x6e5, 0x6e6, - 0x7f4, 0x7f5, - 0x7fa, 0x7fa, - 0x81a, 0x81a, - 0x824, 0x824, - 0x828, 0x828, - 0x971, 0x971, - 0xe46, 0xe46, - 0xec6, 0xec6, - 0x10fc, 0x10fc, - 0x17d7, 0x17d7, - 0x1843, 0x1843, - 0x1aa7, 0x1aa7, - 0x1c78, 0x1c7d, - 0x1d2c, 0x1d6a, - 0x1d78, 0x1d78, - 0x1d9b, 0x1dbf, - 0x2071, 0x2071, - 0x207f, 0x207f, - 0x2090, 0x209c, - 0x2c7c, 0x2c7d, - 0x2d6f, 0x2d6f, - 0x2e2f, 0x2e2f, - 0x3005, 0x3005, - 0x3031, 0x3035, - 0x303b, 0x303b, - 0x309d, 0x309e, - 0x30fc, 0x30fe, - 0xa015, 0xa015, - 0xa4f8, 0xa4fd, - 0xa60c, 0xa60c, - 0xa67f, 0xa67f, - 0xa69c, 0xa69d, - 0xa717, 0xa71f, - 0xa770, 0xa770, - 0xa788, 0xa788, - 0xa7f8, 0xa7f9, - 0xa9cf, 0xa9cf, - 0xa9e6, 0xa9e6, - 0xaa70, 0xaa70, - 0xaadd, 0xaadd, - 0xaaf3, 0xaaf4, - 0xab5c, 0xab5f, - 0xff70, 0xff70, - 0xff9e, 0xff9f, - 0x16b40, 0x16b43, - 0x16f93, 0x16f9f -}; -UCP_FN(Lm) - -static const unichar ucp_Lo_def[] = { - 0xaa, 0xaa, - 0xba, 0xba, - 0x1bb, 0x1bb, - 0x1c0, 0x1c3, - 0x294, 0x294, - 0x5d0, 0x5ea, - 0x5f0, 0x5f2, - 0x620, 0x63f, - 0x641, 0x64a, - 0x66e, 0x66f, - 0x671, 0x6d3, - 0x6d5, 0x6d5, - 0x6ee, 0x6ef, - 0x6fa, 0x6fc, - 0x6ff, 0x6ff, - 0x710, 0x710, - 0x712, 0x72f, - 0x74d, 0x7a5, - 0x7b1, 0x7b1, - 0x7ca, 0x7ea, - 0x800, 0x815, - 0x840, 0x858, - 0x8a0, 0x8b2, - 0x904, 0x939, - 0x93d, 0x93d, - 0x950, 0x950, - 0x958, 0x961, - 0x972, 0x980, - 0x985, 0x98c, - 0x98f, 0x990, - 0x993, 0x9a8, - 0x9aa, 0x9b0, - 0x9b2, 0x9b2, - 0x9b6, 0x9b9, - 0x9bd, 0x9bd, - 0x9ce, 0x9ce, - 0x9dc, 0x9dd, - 0x9df, 0x9e1, - 0x9f0, 0x9f1, - 0xa05, 0xa0a, - 0xa0f, 0xa10, - 0xa13, 0xa28, - 0xa2a, 0xa30, - 0xa32, 0xa33, - 0xa35, 0xa36, - 0xa38, 0xa39, - 0xa59, 0xa5c, - 0xa5e, 0xa5e, - 0xa72, 0xa74, - 0xa85, 0xa8d, - 0xa8f, 0xa91, - 0xa93, 0xaa8, - 0xaaa, 0xab0, - 0xab2, 0xab3, - 0xab5, 0xab9, - 0xabd, 0xabd, - 0xad0, 0xad0, - 0xae0, 0xae1, - 0xb05, 0xb0c, - 0xb0f, 0xb10, - 0xb13, 0xb28, - 0xb2a, 0xb30, - 0xb32, 0xb33, - 0xb35, 0xb39, - 0xb3d, 0xb3d, - 0xb5c, 0xb5d, - 0xb5f, 0xb61, - 0xb71, 0xb71, - 0xb83, 0xb83, - 0xb85, 0xb8a, - 0xb8e, 0xb90, - 0xb92, 0xb95, - 0xb99, 0xb9a, - 0xb9c, 0xb9c, - 0xb9e, 0xb9f, - 0xba3, 0xba4, - 0xba8, 0xbaa, - 0xbae, 0xbb9, - 0xbd0, 0xbd0, - 0xc05, 0xc0c, - 0xc0e, 0xc10, - 0xc12, 0xc28, - 0xc2a, 0xc39, - 0xc3d, 0xc3d, - 0xc58, 0xc59, - 0xc60, 0xc61, - 0xc85, 0xc8c, - 0xc8e, 0xc90, - 0xc92, 0xca8, - 0xcaa, 0xcb3, - 0xcb5, 0xcb9, - 0xcbd, 0xcbd, - 0xcde, 0xcde, - 0xce0, 0xce1, - 0xcf1, 0xcf2, - 0xd05, 0xd0c, - 0xd0e, 0xd10, - 0xd12, 0xd3a, - 0xd3d, 0xd3d, - 0xd4e, 0xd4e, - 0xd60, 0xd61, - 0xd7a, 0xd7f, - 0xd85, 0xd96, - 0xd9a, 0xdb1, - 0xdb3, 0xdbb, - 0xdbd, 0xdbd, - 0xdc0, 0xdc6, - 0xe01, 0xe30, - 0xe32, 0xe33, - 0xe40, 0xe45, - 0xe81, 0xe82, - 0xe84, 0xe84, - 0xe87, 0xe88, - 0xe8a, 0xe8a, - 0xe8d, 0xe8d, - 0xe94, 0xe97, - 0xe99, 0xe9f, - 0xea1, 0xea3, - 0xea5, 0xea5, - 0xea7, 0xea7, - 0xeaa, 0xeab, - 0xead, 0xeb0, - 0xeb2, 0xeb3, - 0xebd, 0xebd, - 0xec0, 0xec4, - 0xedc, 0xedf, - 0xf00, 0xf00, - 0xf40, 0xf47, - 0xf49, 0xf6c, - 0xf88, 0xf8c, - 0x1000, 0x102a, - 0x103f, 0x103f, - 0x1050, 0x1055, - 0x105a, 0x105d, - 0x1061, 0x1061, - 0x1065, 0x1066, - 0x106e, 0x1070, - 0x1075, 0x1081, - 0x108e, 0x108e, - 0x10d0, 0x10fa, - 0x10fd, 0x1248, - 0x124a, 0x124d, - 0x1250, 0x1256, - 0x1258, 0x1258, - 0x125a, 0x125d, - 0x1260, 0x1288, - 0x128a, 0x128d, - 0x1290, 0x12b0, - 0x12b2, 0x12b5, - 0x12b8, 0x12be, - 0x12c0, 0x12c0, - 0x12c2, 0x12c5, - 0x12c8, 0x12d6, - 0x12d8, 0x1310, - 0x1312, 0x1315, - 0x1318, 0x135a, - 0x1380, 0x138f, - 0x13a0, 0x13f4, - 0x1401, 0x166c, - 0x166f, 0x167f, - 0x1681, 0x169a, - 0x16a0, 0x16ea, - 0x16f1, 0x16f8, - 0x1700, 0x170c, - 0x170e, 0x1711, - 0x1720, 0x1731, - 0x1740, 0x1751, - 0x1760, 0x176c, - 0x176e, 0x1770, - 0x1780, 0x17b3, - 0x17dc, 0x17dc, - 0x1820, 0x1842, - 0x1844, 0x1877, - 0x1880, 0x18a8, - 0x18aa, 0x18aa, - 0x18b0, 0x18f5, - 0x1900, 0x191e, - 0x1950, 0x196d, - 0x1970, 0x1974, - 0x1980, 0x19ab, - 0x19c1, 0x19c7, - 0x1a00, 0x1a16, - 0x1a20, 0x1a54, - 0x1b05, 0x1b33, - 0x1b45, 0x1b4b, - 0x1b83, 0x1ba0, - 0x1bae, 0x1baf, - 0x1bba, 0x1be5, - 0x1c00, 0x1c23, - 0x1c4d, 0x1c4f, - 0x1c5a, 0x1c77, - 0x1ce9, 0x1cec, - 0x1cee, 0x1cf1, - 0x1cf5, 0x1cf6, - 0x2135, 0x2138, - 0x2d30, 0x2d67, - 0x2d80, 0x2d96, - 0x2da0, 0x2da6, - 0x2da8, 0x2dae, - 0x2db0, 0x2db6, - 0x2db8, 0x2dbe, - 0x2dc0, 0x2dc6, - 0x2dc8, 0x2dce, - 0x2dd0, 0x2dd6, - 0x2dd8, 0x2dde, - 0x3006, 0x3006, - 0x303c, 0x303c, - 0x3041, 0x3096, - 0x309f, 0x309f, - 0x30a1, 0x30fa, - 0x30ff, 0x30ff, - 0x3105, 0x312d, - 0x3131, 0x318e, - 0x31a0, 0x31ba, - 0x31f0, 0x31ff, - 0x3400, 0x4db5, - 0x4e00, 0x9fcc, - 0xa000, 0xa014, - 0xa016, 0xa48c, - 0xa4d0, 0xa4f7, - 0xa500, 0xa60b, - 0xa610, 0xa61f, - 0xa62a, 0xa62b, - 0xa66e, 0xa66e, - 0xa6a0, 0xa6e5, - 0xa7f7, 0xa7f7, - 0xa7fb, 0xa801, - 0xa803, 0xa805, - 0xa807, 0xa80a, - 0xa80c, 0xa822, - 0xa840, 0xa873, - 0xa882, 0xa8b3, - 0xa8f2, 0xa8f7, - 0xa8fb, 0xa8fb, - 0xa90a, 0xa925, - 0xa930, 0xa946, - 0xa960, 0xa97c, - 0xa984, 0xa9b2, - 0xa9e0, 0xa9e4, - 0xa9e7, 0xa9ef, - 0xa9fa, 0xa9fe, - 0xaa00, 0xaa28, - 0xaa40, 0xaa42, - 0xaa44, 0xaa4b, - 0xaa60, 0xaa6f, - 0xaa71, 0xaa76, - 0xaa7a, 0xaa7a, - 0xaa7e, 0xaaaf, - 0xaab1, 0xaab1, - 0xaab5, 0xaab6, - 0xaab9, 0xaabd, - 0xaac0, 0xaac0, - 0xaac2, 0xaac2, - 0xaadb, 0xaadc, - 0xaae0, 0xaaea, - 0xaaf2, 0xaaf2, - 0xab01, 0xab06, - 0xab09, 0xab0e, - 0xab11, 0xab16, - 0xab20, 0xab26, - 0xab28, 0xab2e, - 0xabc0, 0xabe2, - 0xac00, 0xd7a3, - 0xd7b0, 0xd7c6, - 0xd7cb, 0xd7fb, - 0xf900, 0xfa6d, - 0xfa70, 0xfad9, - 0xfb1d, 0xfb1d, - 0xfb1f, 0xfb28, - 0xfb2a, 0xfb36, - 0xfb38, 0xfb3c, - 0xfb3e, 0xfb3e, - 0xfb40, 0xfb41, - 0xfb43, 0xfb44, - 0xfb46, 0xfbb1, - 0xfbd3, 0xfd3d, - 0xfd50, 0xfd8f, - 0xfd92, 0xfdc7, - 0xfdf0, 0xfdfb, - 0xfe70, 0xfe74, - 0xfe76, 0xfefc, - 0xff66, 0xff6f, - 0xff71, 0xff9d, - 0xffa0, 0xffbe, - 0xffc2, 0xffc7, - 0xffca, 0xffcf, - 0xffd2, 0xffd7, - 0xffda, 0xffdc, - 0x10000, 0x1000b, - 0x1000d, 0x10026, - 0x10028, 0x1003a, - 0x1003c, 0x1003d, - 0x1003f, 0x1004d, - 0x10050, 0x1005d, - 0x10080, 0x100fa, - 0x10280, 0x1029c, - 0x102a0, 0x102d0, - 0x10300, 0x1031f, - 0x10330, 0x10340, - 0x10342, 0x10349, - 0x10350, 0x10375, - 0x10380, 0x1039d, - 0x103a0, 0x103c3, - 0x103c8, 0x103cf, - 0x10450, 0x1049d, - 0x10500, 0x10527, - 0x10530, 0x10563, - 0x10600, 0x10736, - 0x10740, 0x10755, - 0x10760, 0x10767, - 0x10800, 0x10805, - 0x10808, 0x10808, - 0x1080a, 0x10835, - 0x10837, 0x10838, - 0x1083c, 0x1083c, - 0x1083f, 0x10855, - 0x10860, 0x10876, - 0x10880, 0x1089e, - 0x10900, 0x10915, - 0x10920, 0x10939, - 0x10980, 0x109b7, - 0x109be, 0x109bf, - 0x10a00, 0x10a00, - 0x10a10, 0x10a13, - 0x10a15, 0x10a17, - 0x10a19, 0x10a33, - 0x10a60, 0x10a7c, - 0x10a80, 0x10a9c, - 0x10ac0, 0x10ac7, - 0x10ac9, 0x10ae4, - 0x10b00, 0x10b35, - 0x10b40, 0x10b55, - 0x10b60, 0x10b72, - 0x10b80, 0x10b91, - 0x10c00, 0x10c48, - 0x11003, 0x11037, - 0x11083, 0x110af, - 0x110d0, 0x110e8, - 0x11103, 0x11126, - 0x11150, 0x11172, - 0x11176, 0x11176, - 0x11183, 0x111b2, - 0x111c1, 0x111c4, - 0x111da, 0x111da, - 0x11200, 0x11211, - 0x11213, 0x1122b, - 0x112b0, 0x112de, - 0x11305, 0x1130c, - 0x1130f, 0x11310, - 0x11313, 0x11328, - 0x1132a, 0x11330, - 0x11332, 0x11333, - 0x11335, 0x11339, - 0x1133d, 0x1133d, - 0x1135d, 0x11361, - 0x11480, 0x114af, - 0x114c4, 0x114c5, - 0x114c7, 0x114c7, - 0x11580, 0x115ae, - 0x11600, 0x1162f, - 0x11644, 0x11644, - 0x11680, 0x116aa, - 0x118ff, 0x118ff, - 0x11ac0, 0x11af8, - 0x12000, 0x12398, - 0x13000, 0x1342e, - 0x16800, 0x16a38, - 0x16a40, 0x16a5e, - 0x16ad0, 0x16aed, - 0x16b00, 0x16b2f, - 0x16b63, 0x16b77, - 0x16b7d, 0x16b8f, - 0x16f00, 0x16f44, - 0x16f50, 0x16f50, - 0x1b000, 0x1b001, - 0x1bc00, 0x1bc6a, - 0x1bc70, 0x1bc7c, - 0x1bc80, 0x1bc88, - 0x1bc90, 0x1bc99, - 0x1e800, 0x1e8c4, - 0x1ee00, 0x1ee03, - 0x1ee05, 0x1ee1f, - 0x1ee21, 0x1ee22, - 0x1ee24, 0x1ee24, - 0x1ee27, 0x1ee27, - 0x1ee29, 0x1ee32, - 0x1ee34, 0x1ee37, - 0x1ee39, 0x1ee39, - 0x1ee3b, 0x1ee3b, - 0x1ee42, 0x1ee42, - 0x1ee47, 0x1ee47, - 0x1ee49, 0x1ee49, - 0x1ee4b, 0x1ee4b, - 0x1ee4d, 0x1ee4f, - 0x1ee51, 0x1ee52, - 0x1ee54, 0x1ee54, - 0x1ee57, 0x1ee57, - 0x1ee59, 0x1ee59, - 0x1ee5b, 0x1ee5b, - 0x1ee5d, 0x1ee5d, - 0x1ee5f, 0x1ee5f, - 0x1ee61, 0x1ee62, - 0x1ee64, 0x1ee64, - 0x1ee67, 0x1ee6a, - 0x1ee6c, 0x1ee72, - 0x1ee74, 0x1ee77, - 0x1ee79, 0x1ee7c, - 0x1ee7e, 0x1ee7e, - 0x1ee80, 0x1ee89, - 0x1ee8b, 0x1ee9b, - 0x1eea1, 0x1eea3, - 0x1eea5, 0x1eea9, - 0x1eeab, 0x1eebb, - 0x20000, 0x2a6d6, - 0x2a700, 0x2b734, - 0x2b740, 0x2b81d, - 0x2f800, 0x2fa1d -}; -UCP_FN(Lo) - -static const unichar ucp_Lt_def[] = { - 0x1c5, 0x1c5, - 0x1c8, 0x1c8, - 0x1cb, 0x1cb, - 0x1f2, 0x1f2, - 0x1f88, 0x1f8f, - 0x1f98, 0x1f9f, - 0x1fa8, 0x1faf, - 0x1fbc, 0x1fbc, - 0x1fcc, 0x1fcc, - 0x1ffc, 0x1ffc -}; -UCP_FN(Lt) - -static const unichar ucp_Lu_def[] = { - 0x41, 0x5a, - 0xc0, 0xd6, - 0xd8, 0xde, - 0x100, 0x100, - 0x102, 0x102, - 0x104, 0x104, - 0x106, 0x106, - 0x108, 0x108, - 0x10a, 0x10a, - 0x10c, 0x10c, - 0x10e, 0x10e, - 0x110, 0x110, - 0x112, 0x112, - 0x114, 0x114, - 0x116, 0x116, - 0x118, 0x118, - 0x11a, 0x11a, - 0x11c, 0x11c, - 0x11e, 0x11e, - 0x120, 0x120, - 0x122, 0x122, - 0x124, 0x124, - 0x126, 0x126, - 0x128, 0x128, - 0x12a, 0x12a, - 0x12c, 0x12c, - 0x12e, 0x12e, - 0x130, 0x130, - 0x132, 0x132, - 0x134, 0x134, - 0x136, 0x136, - 0x139, 0x139, - 0x13b, 0x13b, - 0x13d, 0x13d, - 0x13f, 0x13f, - 0x141, 0x141, - 0x143, 0x143, - 0x145, 0x145, - 0x147, 0x147, - 0x14a, 0x14a, - 0x14c, 0x14c, - 0x14e, 0x14e, - 0x150, 0x150, - 0x152, 0x152, - 0x154, 0x154, - 0x156, 0x156, - 0x158, 0x158, - 0x15a, 0x15a, - 0x15c, 0x15c, - 0x15e, 0x15e, - 0x160, 0x160, - 0x162, 0x162, - 0x164, 0x164, - 0x166, 0x166, - 0x168, 0x168, - 0x16a, 0x16a, - 0x16c, 0x16c, - 0x16e, 0x16e, - 0x170, 0x170, - 0x172, 0x172, - 0x174, 0x174, - 0x176, 0x176, - 0x178, 0x179, - 0x17b, 0x17b, - 0x17d, 0x17d, - 0x181, 0x182, - 0x184, 0x184, - 0x186, 0x187, - 0x189, 0x18b, - 0x18e, 0x191, - 0x193, 0x194, - 0x196, 0x198, - 0x19c, 0x19d, - 0x19f, 0x1a0, - 0x1a2, 0x1a2, - 0x1a4, 0x1a4, - 0x1a6, 0x1a7, - 0x1a9, 0x1a9, - 0x1ac, 0x1ac, - 0x1ae, 0x1af, - 0x1b1, 0x1b3, - 0x1b5, 0x1b5, - 0x1b7, 0x1b8, - 0x1bc, 0x1bc, - 0x1c4, 0x1c4, - 0x1c7, 0x1c7, - 0x1ca, 0x1ca, - 0x1cd, 0x1cd, - 0x1cf, 0x1cf, - 0x1d1, 0x1d1, - 0x1d3, 0x1d3, - 0x1d5, 0x1d5, - 0x1d7, 0x1d7, - 0x1d9, 0x1d9, - 0x1db, 0x1db, - 0x1de, 0x1de, - 0x1e0, 0x1e0, - 0x1e2, 0x1e2, - 0x1e4, 0x1e4, - 0x1e6, 0x1e6, - 0x1e8, 0x1e8, - 0x1ea, 0x1ea, - 0x1ec, 0x1ec, - 0x1ee, 0x1ee, - 0x1f1, 0x1f1, - 0x1f4, 0x1f4, - 0x1f6, 0x1f8, - 0x1fa, 0x1fa, - 0x1fc, 0x1fc, - 0x1fe, 0x1fe, - 0x200, 0x200, - 0x202, 0x202, - 0x204, 0x204, - 0x206, 0x206, - 0x208, 0x208, - 0x20a, 0x20a, - 0x20c, 0x20c, - 0x20e, 0x20e, - 0x210, 0x210, - 0x212, 0x212, - 0x214, 0x214, - 0x216, 0x216, - 0x218, 0x218, - 0x21a, 0x21a, - 0x21c, 0x21c, - 0x21e, 0x21e, - 0x220, 0x220, - 0x222, 0x222, - 0x224, 0x224, - 0x226, 0x226, - 0x228, 0x228, - 0x22a, 0x22a, - 0x22c, 0x22c, - 0x22e, 0x22e, - 0x230, 0x230, - 0x232, 0x232, - 0x23a, 0x23b, - 0x23d, 0x23e, - 0x241, 0x241, - 0x243, 0x246, - 0x248, 0x248, - 0x24a, 0x24a, - 0x24c, 0x24c, - 0x24e, 0x24e, - 0x370, 0x370, - 0x372, 0x372, - 0x376, 0x376, - 0x37f, 0x37f, - 0x386, 0x386, - 0x388, 0x38a, - 0x38c, 0x38c, - 0x38e, 0x38f, - 0x391, 0x3a1, - 0x3a3, 0x3ab, - 0x3cf, 0x3cf, - 0x3d2, 0x3d4, - 0x3d8, 0x3d8, - 0x3da, 0x3da, - 0x3dc, 0x3dc, - 0x3de, 0x3de, - 0x3e0, 0x3e0, - 0x3e2, 0x3e2, - 0x3e4, 0x3e4, - 0x3e6, 0x3e6, - 0x3e8, 0x3e8, - 0x3ea, 0x3ea, - 0x3ec, 0x3ec, - 0x3ee, 0x3ee, - 0x3f4, 0x3f4, - 0x3f7, 0x3f7, - 0x3f9, 0x3fa, - 0x3fd, 0x42f, - 0x460, 0x460, - 0x462, 0x462, - 0x464, 0x464, - 0x466, 0x466, - 0x468, 0x468, - 0x46a, 0x46a, - 0x46c, 0x46c, - 0x46e, 0x46e, - 0x470, 0x470, - 0x472, 0x472, - 0x474, 0x474, - 0x476, 0x476, - 0x478, 0x478, - 0x47a, 0x47a, - 0x47c, 0x47c, - 0x47e, 0x47e, - 0x480, 0x480, - 0x48a, 0x48a, - 0x48c, 0x48c, - 0x48e, 0x48e, - 0x490, 0x490, - 0x492, 0x492, - 0x494, 0x494, - 0x496, 0x496, - 0x498, 0x498, - 0x49a, 0x49a, - 0x49c, 0x49c, - 0x49e, 0x49e, - 0x4a0, 0x4a0, - 0x4a2, 0x4a2, - 0x4a4, 0x4a4, - 0x4a6, 0x4a6, - 0x4a8, 0x4a8, - 0x4aa, 0x4aa, - 0x4ac, 0x4ac, - 0x4ae, 0x4ae, - 0x4b0, 0x4b0, - 0x4b2, 0x4b2, - 0x4b4, 0x4b4, - 0x4b6, 0x4b6, - 0x4b8, 0x4b8, - 0x4ba, 0x4ba, - 0x4bc, 0x4bc, - 0x4be, 0x4be, - 0x4c0, 0x4c1, - 0x4c3, 0x4c3, - 0x4c5, 0x4c5, - 0x4c7, 0x4c7, - 0x4c9, 0x4c9, - 0x4cb, 0x4cb, - 0x4cd, 0x4cd, - 0x4d0, 0x4d0, - 0x4d2, 0x4d2, - 0x4d4, 0x4d4, - 0x4d6, 0x4d6, - 0x4d8, 0x4d8, - 0x4da, 0x4da, - 0x4dc, 0x4dc, - 0x4de, 0x4de, - 0x4e0, 0x4e0, - 0x4e2, 0x4e2, - 0x4e4, 0x4e4, - 0x4e6, 0x4e6, - 0x4e8, 0x4e8, - 0x4ea, 0x4ea, - 0x4ec, 0x4ec, - 0x4ee, 0x4ee, - 0x4f0, 0x4f0, - 0x4f2, 0x4f2, - 0x4f4, 0x4f4, - 0x4f6, 0x4f6, - 0x4f8, 0x4f8, - 0x4fa, 0x4fa, - 0x4fc, 0x4fc, - 0x4fe, 0x4fe, - 0x500, 0x500, - 0x502, 0x502, - 0x504, 0x504, - 0x506, 0x506, - 0x508, 0x508, - 0x50a, 0x50a, - 0x50c, 0x50c, - 0x50e, 0x50e, - 0x510, 0x510, - 0x512, 0x512, - 0x514, 0x514, - 0x516, 0x516, - 0x518, 0x518, - 0x51a, 0x51a, - 0x51c, 0x51c, - 0x51e, 0x51e, - 0x520, 0x520, - 0x522, 0x522, - 0x524, 0x524, - 0x526, 0x526, - 0x528, 0x528, - 0x52a, 0x52a, - 0x52c, 0x52c, - 0x52e, 0x52e, - 0x531, 0x556, - 0x10a0, 0x10c5, - 0x10c7, 0x10c7, - 0x10cd, 0x10cd, - 0x1e00, 0x1e00, - 0x1e02, 0x1e02, - 0x1e04, 0x1e04, - 0x1e06, 0x1e06, - 0x1e08, 0x1e08, - 0x1e0a, 0x1e0a, - 0x1e0c, 0x1e0c, - 0x1e0e, 0x1e0e, - 0x1e10, 0x1e10, - 0x1e12, 0x1e12, - 0x1e14, 0x1e14, - 0x1e16, 0x1e16, - 0x1e18, 0x1e18, - 0x1e1a, 0x1e1a, - 0x1e1c, 0x1e1c, - 0x1e1e, 0x1e1e, - 0x1e20, 0x1e20, - 0x1e22, 0x1e22, - 0x1e24, 0x1e24, - 0x1e26, 0x1e26, - 0x1e28, 0x1e28, - 0x1e2a, 0x1e2a, - 0x1e2c, 0x1e2c, - 0x1e2e, 0x1e2e, - 0x1e30, 0x1e30, - 0x1e32, 0x1e32, - 0x1e34, 0x1e34, - 0x1e36, 0x1e36, - 0x1e38, 0x1e38, - 0x1e3a, 0x1e3a, - 0x1e3c, 0x1e3c, - 0x1e3e, 0x1e3e, - 0x1e40, 0x1e40, - 0x1e42, 0x1e42, - 0x1e44, 0x1e44, - 0x1e46, 0x1e46, - 0x1e48, 0x1e48, - 0x1e4a, 0x1e4a, - 0x1e4c, 0x1e4c, - 0x1e4e, 0x1e4e, - 0x1e50, 0x1e50, - 0x1e52, 0x1e52, - 0x1e54, 0x1e54, - 0x1e56, 0x1e56, - 0x1e58, 0x1e58, - 0x1e5a, 0x1e5a, - 0x1e5c, 0x1e5c, - 0x1e5e, 0x1e5e, - 0x1e60, 0x1e60, - 0x1e62, 0x1e62, - 0x1e64, 0x1e64, - 0x1e66, 0x1e66, - 0x1e68, 0x1e68, - 0x1e6a, 0x1e6a, - 0x1e6c, 0x1e6c, - 0x1e6e, 0x1e6e, - 0x1e70, 0x1e70, - 0x1e72, 0x1e72, - 0x1e74, 0x1e74, - 0x1e76, 0x1e76, - 0x1e78, 0x1e78, - 0x1e7a, 0x1e7a, - 0x1e7c, 0x1e7c, - 0x1e7e, 0x1e7e, - 0x1e80, 0x1e80, - 0x1e82, 0x1e82, - 0x1e84, 0x1e84, - 0x1e86, 0x1e86, - 0x1e88, 0x1e88, - 0x1e8a, 0x1e8a, - 0x1e8c, 0x1e8c, - 0x1e8e, 0x1e8e, - 0x1e90, 0x1e90, - 0x1e92, 0x1e92, - 0x1e94, 0x1e94, - 0x1e9e, 0x1e9e, - 0x1ea0, 0x1ea0, - 0x1ea2, 0x1ea2, - 0x1ea4, 0x1ea4, - 0x1ea6, 0x1ea6, - 0x1ea8, 0x1ea8, - 0x1eaa, 0x1eaa, - 0x1eac, 0x1eac, - 0x1eae, 0x1eae, - 0x1eb0, 0x1eb0, - 0x1eb2, 0x1eb2, - 0x1eb4, 0x1eb4, - 0x1eb6, 0x1eb6, - 0x1eb8, 0x1eb8, - 0x1eba, 0x1eba, - 0x1ebc, 0x1ebc, - 0x1ebe, 0x1ebe, - 0x1ec0, 0x1ec0, - 0x1ec2, 0x1ec2, - 0x1ec4, 0x1ec4, - 0x1ec6, 0x1ec6, - 0x1ec8, 0x1ec8, - 0x1eca, 0x1eca, - 0x1ecc, 0x1ecc, - 0x1ece, 0x1ece, - 0x1ed0, 0x1ed0, - 0x1ed2, 0x1ed2, - 0x1ed4, 0x1ed4, - 0x1ed6, 0x1ed6, - 0x1ed8, 0x1ed8, - 0x1eda, 0x1eda, - 0x1edc, 0x1edc, - 0x1ede, 0x1ede, - 0x1ee0, 0x1ee0, - 0x1ee2, 0x1ee2, - 0x1ee4, 0x1ee4, - 0x1ee6, 0x1ee6, - 0x1ee8, 0x1ee8, - 0x1eea, 0x1eea, - 0x1eec, 0x1eec, - 0x1eee, 0x1eee, - 0x1ef0, 0x1ef0, - 0x1ef2, 0x1ef2, - 0x1ef4, 0x1ef4, - 0x1ef6, 0x1ef6, - 0x1ef8, 0x1ef8, - 0x1efa, 0x1efa, - 0x1efc, 0x1efc, - 0x1efe, 0x1efe, - 0x1f08, 0x1f0f, - 0x1f18, 0x1f1d, - 0x1f28, 0x1f2f, - 0x1f38, 0x1f3f, - 0x1f48, 0x1f4d, - 0x1f59, 0x1f59, - 0x1f5b, 0x1f5b, - 0x1f5d, 0x1f5d, - 0x1f5f, 0x1f5f, - 0x1f68, 0x1f6f, - 0x1fb8, 0x1fbb, - 0x1fc8, 0x1fcb, - 0x1fd8, 0x1fdb, - 0x1fe8, 0x1fec, - 0x1ff8, 0x1ffb, - 0x2102, 0x2102, - 0x2107, 0x2107, - 0x210b, 0x210d, - 0x2110, 0x2112, - 0x2115, 0x2115, - 0x2119, 0x211d, - 0x2124, 0x2124, - 0x2126, 0x2126, - 0x2128, 0x2128, - 0x212a, 0x212d, - 0x2130, 0x2133, - 0x213e, 0x213f, - 0x2145, 0x2145, - 0x2183, 0x2183, - 0x2c00, 0x2c2e, - 0x2c60, 0x2c60, - 0x2c62, 0x2c64, - 0x2c67, 0x2c67, - 0x2c69, 0x2c69, - 0x2c6b, 0x2c6b, - 0x2c6d, 0x2c70, - 0x2c72, 0x2c72, - 0x2c75, 0x2c75, - 0x2c7e, 0x2c80, - 0x2c82, 0x2c82, - 0x2c84, 0x2c84, - 0x2c86, 0x2c86, - 0x2c88, 0x2c88, - 0x2c8a, 0x2c8a, - 0x2c8c, 0x2c8c, - 0x2c8e, 0x2c8e, - 0x2c90, 0x2c90, - 0x2c92, 0x2c92, - 0x2c94, 0x2c94, - 0x2c96, 0x2c96, - 0x2c98, 0x2c98, - 0x2c9a, 0x2c9a, - 0x2c9c, 0x2c9c, - 0x2c9e, 0x2c9e, - 0x2ca0, 0x2ca0, - 0x2ca2, 0x2ca2, - 0x2ca4, 0x2ca4, - 0x2ca6, 0x2ca6, - 0x2ca8, 0x2ca8, - 0x2caa, 0x2caa, - 0x2cac, 0x2cac, - 0x2cae, 0x2cae, - 0x2cb0, 0x2cb0, - 0x2cb2, 0x2cb2, - 0x2cb4, 0x2cb4, - 0x2cb6, 0x2cb6, - 0x2cb8, 0x2cb8, - 0x2cba, 0x2cba, - 0x2cbc, 0x2cbc, - 0x2cbe, 0x2cbe, - 0x2cc0, 0x2cc0, - 0x2cc2, 0x2cc2, - 0x2cc4, 0x2cc4, - 0x2cc6, 0x2cc6, - 0x2cc8, 0x2cc8, - 0x2cca, 0x2cca, - 0x2ccc, 0x2ccc, - 0x2cce, 0x2cce, - 0x2cd0, 0x2cd0, - 0x2cd2, 0x2cd2, - 0x2cd4, 0x2cd4, - 0x2cd6, 0x2cd6, - 0x2cd8, 0x2cd8, - 0x2cda, 0x2cda, - 0x2cdc, 0x2cdc, - 0x2cde, 0x2cde, - 0x2ce0, 0x2ce0, - 0x2ce2, 0x2ce2, - 0x2ceb, 0x2ceb, - 0x2ced, 0x2ced, - 0x2cf2, 0x2cf2, - 0xa640, 0xa640, - 0xa642, 0xa642, - 0xa644, 0xa644, - 0xa646, 0xa646, - 0xa648, 0xa648, - 0xa64a, 0xa64a, - 0xa64c, 0xa64c, - 0xa64e, 0xa64e, - 0xa650, 0xa650, - 0xa652, 0xa652, - 0xa654, 0xa654, - 0xa656, 0xa656, - 0xa658, 0xa658, - 0xa65a, 0xa65a, - 0xa65c, 0xa65c, - 0xa65e, 0xa65e, - 0xa660, 0xa660, - 0xa662, 0xa662, - 0xa664, 0xa664, - 0xa666, 0xa666, - 0xa668, 0xa668, - 0xa66a, 0xa66a, - 0xa66c, 0xa66c, - 0xa680, 0xa680, - 0xa682, 0xa682, - 0xa684, 0xa684, - 0xa686, 0xa686, - 0xa688, 0xa688, - 0xa68a, 0xa68a, - 0xa68c, 0xa68c, - 0xa68e, 0xa68e, - 0xa690, 0xa690, - 0xa692, 0xa692, - 0xa694, 0xa694, - 0xa696, 0xa696, - 0xa698, 0xa698, - 0xa69a, 0xa69a, - 0xa722, 0xa722, - 0xa724, 0xa724, - 0xa726, 0xa726, - 0xa728, 0xa728, - 0xa72a, 0xa72a, - 0xa72c, 0xa72c, - 0xa72e, 0xa72e, - 0xa732, 0xa732, - 0xa734, 0xa734, - 0xa736, 0xa736, - 0xa738, 0xa738, - 0xa73a, 0xa73a, - 0xa73c, 0xa73c, - 0xa73e, 0xa73e, - 0xa740, 0xa740, - 0xa742, 0xa742, - 0xa744, 0xa744, - 0xa746, 0xa746, - 0xa748, 0xa748, - 0xa74a, 0xa74a, - 0xa74c, 0xa74c, - 0xa74e, 0xa74e, - 0xa750, 0xa750, - 0xa752, 0xa752, - 0xa754, 0xa754, - 0xa756, 0xa756, - 0xa758, 0xa758, - 0xa75a, 0xa75a, - 0xa75c, 0xa75c, - 0xa75e, 0xa75e, - 0xa760, 0xa760, - 0xa762, 0xa762, - 0xa764, 0xa764, - 0xa766, 0xa766, - 0xa768, 0xa768, - 0xa76a, 0xa76a, - 0xa76c, 0xa76c, - 0xa76e, 0xa76e, - 0xa779, 0xa779, - 0xa77b, 0xa77b, - 0xa77d, 0xa77e, - 0xa780, 0xa780, - 0xa782, 0xa782, - 0xa784, 0xa784, - 0xa786, 0xa786, - 0xa78b, 0xa78b, - 0xa78d, 0xa78d, - 0xa790, 0xa790, - 0xa792, 0xa792, - 0xa796, 0xa796, - 0xa798, 0xa798, - 0xa79a, 0xa79a, - 0xa79c, 0xa79c, - 0xa79e, 0xa79e, - 0xa7a0, 0xa7a0, - 0xa7a2, 0xa7a2, - 0xa7a4, 0xa7a4, - 0xa7a6, 0xa7a6, - 0xa7a8, 0xa7a8, - 0xa7aa, 0xa7ad, - 0xa7b0, 0xa7b1, - 0xff21, 0xff3a, - 0x10400, 0x10427, - 0x118a0, 0x118bf, - 0x1d400, 0x1d419, - 0x1d434, 0x1d44d, - 0x1d468, 0x1d481, - 0x1d49c, 0x1d49c, - 0x1d49e, 0x1d49f, - 0x1d4a2, 0x1d4a2, - 0x1d4a5, 0x1d4a6, - 0x1d4a9, 0x1d4ac, - 0x1d4ae, 0x1d4b5, - 0x1d4d0, 0x1d4e9, - 0x1d504, 0x1d505, - 0x1d507, 0x1d50a, - 0x1d50d, 0x1d514, - 0x1d516, 0x1d51c, - 0x1d538, 0x1d539, - 0x1d53b, 0x1d53e, - 0x1d540, 0x1d544, - 0x1d546, 0x1d546, - 0x1d54a, 0x1d550, - 0x1d56c, 0x1d585, - 0x1d5a0, 0x1d5b9, - 0x1d5d4, 0x1d5ed, - 0x1d608, 0x1d621, - 0x1d63c, 0x1d655, - 0x1d670, 0x1d689, - 0x1d6a8, 0x1d6c0, - 0x1d6e2, 0x1d6fa, - 0x1d71c, 0x1d734, - 0x1d756, 0x1d76e, - 0x1d790, 0x1d7a8, - 0x1d7ca, 0x1d7ca -}; -UCP_FN(Lu) - -static const unichar ucp_M_def[] = { - 0x300, 0x36f, - 0x483, 0x489, - 0x591, 0x5bd, - 0x5bf, 0x5bf, - 0x5c1, 0x5c2, - 0x5c4, 0x5c5, - 0x5c7, 0x5c7, - 0x610, 0x61a, - 0x64b, 0x65f, - 0x670, 0x670, - 0x6d6, 0x6dc, - 0x6df, 0x6e4, - 0x6e7, 0x6e8, - 0x6ea, 0x6ed, - 0x711, 0x711, - 0x730, 0x74a, - 0x7a6, 0x7b0, - 0x7eb, 0x7f3, - 0x816, 0x819, - 0x81b, 0x823, - 0x825, 0x827, - 0x829, 0x82d, - 0x859, 0x85b, - 0x8e4, 0x903, - 0x93a, 0x93c, - 0x93e, 0x94f, - 0x951, 0x957, - 0x962, 0x963, - 0x981, 0x983, - 0x9bc, 0x9bc, - 0x9be, 0x9c4, - 0x9c7, 0x9c8, - 0x9cb, 0x9cd, - 0x9d7, 0x9d7, - 0x9e2, 0x9e3, - 0xa01, 0xa03, - 0xa3c, 0xa3c, - 0xa3e, 0xa42, - 0xa47, 0xa48, - 0xa4b, 0xa4d, - 0xa51, 0xa51, - 0xa70, 0xa71, - 0xa75, 0xa75, - 0xa81, 0xa83, - 0xabc, 0xabc, - 0xabe, 0xac5, - 0xac7, 0xac9, - 0xacb, 0xacd, - 0xae2, 0xae3, - 0xb01, 0xb03, - 0xb3c, 0xb3c, - 0xb3e, 0xb44, - 0xb47, 0xb48, - 0xb4b, 0xb4d, - 0xb56, 0xb57, - 0xb62, 0xb63, - 0xb82, 0xb82, - 0xbbe, 0xbc2, - 0xbc6, 0xbc8, - 0xbca, 0xbcd, - 0xbd7, 0xbd7, - 0xc00, 0xc03, - 0xc3e, 0xc44, - 0xc46, 0xc48, - 0xc4a, 0xc4d, - 0xc55, 0xc56, - 0xc62, 0xc63, - 0xc81, 0xc83, - 0xcbc, 0xcbc, - 0xcbe, 0xcc4, - 0xcc6, 0xcc8, - 0xcca, 0xccd, - 0xcd5, 0xcd6, - 0xce2, 0xce3, - 0xd01, 0xd03, - 0xd3e, 0xd44, - 0xd46, 0xd48, - 0xd4a, 0xd4d, - 0xd57, 0xd57, - 0xd62, 0xd63, - 0xd82, 0xd83, - 0xdca, 0xdca, - 0xdcf, 0xdd4, - 0xdd6, 0xdd6, - 0xdd8, 0xddf, - 0xdf2, 0xdf3, - 0xe31, 0xe31, - 0xe34, 0xe3a, - 0xe47, 0xe4e, - 0xeb1, 0xeb1, - 0xeb4, 0xeb9, - 0xebb, 0xebc, - 0xec8, 0xecd, - 0xf18, 0xf19, - 0xf35, 0xf35, - 0xf37, 0xf37, - 0xf39, 0xf39, - 0xf3e, 0xf3f, - 0xf71, 0xf84, - 0xf86, 0xf87, - 0xf8d, 0xf97, - 0xf99, 0xfbc, - 0xfc6, 0xfc6, - 0x102b, 0x103e, - 0x1056, 0x1059, - 0x105e, 0x1060, - 0x1062, 0x1064, - 0x1067, 0x106d, - 0x1071, 0x1074, - 0x1082, 0x108d, - 0x108f, 0x108f, - 0x109a, 0x109d, - 0x135d, 0x135f, - 0x1712, 0x1714, - 0x1732, 0x1734, - 0x1752, 0x1753, - 0x1772, 0x1773, - 0x17b4, 0x17d3, - 0x17dd, 0x17dd, - 0x180b, 0x180d, - 0x18a9, 0x18a9, - 0x1920, 0x192b, - 0x1930, 0x193b, - 0x19b0, 0x19c0, - 0x19c8, 0x19c9, - 0x1a17, 0x1a1b, - 0x1a55, 0x1a5e, - 0x1a60, 0x1a7c, - 0x1a7f, 0x1a7f, - 0x1ab0, 0x1abe, - 0x1b00, 0x1b04, - 0x1b34, 0x1b44, - 0x1b6b, 0x1b73, - 0x1b80, 0x1b82, - 0x1ba1, 0x1bad, - 0x1be6, 0x1bf3, - 0x1c24, 0x1c37, - 0x1cd0, 0x1cd2, - 0x1cd4, 0x1ce8, - 0x1ced, 0x1ced, - 0x1cf2, 0x1cf4, - 0x1cf8, 0x1cf9, - 0x1dc0, 0x1df5, - 0x1dfc, 0x1dff, - 0x20d0, 0x20f0, - 0x2cef, 0x2cf1, - 0x2d7f, 0x2d7f, - 0x2de0, 0x2dff, - 0x302a, 0x302f, - 0x3099, 0x309a, - 0xa66f, 0xa672, - 0xa674, 0xa67d, - 0xa69f, 0xa69f, - 0xa6f0, 0xa6f1, - 0xa802, 0xa802, - 0xa806, 0xa806, - 0xa80b, 0xa80b, - 0xa823, 0xa827, - 0xa880, 0xa881, - 0xa8b4, 0xa8c4, - 0xa8e0, 0xa8f1, - 0xa926, 0xa92d, - 0xa947, 0xa953, - 0xa980, 0xa983, - 0xa9b3, 0xa9c0, - 0xa9e5, 0xa9e5, - 0xaa29, 0xaa36, - 0xaa43, 0xaa43, - 0xaa4c, 0xaa4d, - 0xaa7b, 0xaa7d, - 0xaab0, 0xaab0, - 0xaab2, 0xaab4, - 0xaab7, 0xaab8, - 0xaabe, 0xaabf, - 0xaac1, 0xaac1, - 0xaaeb, 0xaaef, - 0xaaf5, 0xaaf6, - 0xabe3, 0xabea, - 0xabec, 0xabed, - 0xfb1e, 0xfb1e, - 0xfe00, 0xfe0f, - 0xfe20, 0xfe2d, - 0x101fd, 0x101fd, - 0x102e0, 0x102e0, - 0x10376, 0x1037a, - 0x10a01, 0x10a03, - 0x10a05, 0x10a06, - 0x10a0c, 0x10a0f, - 0x10a38, 0x10a3a, - 0x10a3f, 0x10a3f, - 0x10ae5, 0x10ae6, - 0x11000, 0x11002, - 0x11038, 0x11046, - 0x1107f, 0x11082, - 0x110b0, 0x110ba, - 0x11100, 0x11102, - 0x11127, 0x11134, - 0x11173, 0x11173, - 0x11180, 0x11182, - 0x111b3, 0x111c0, - 0x1122c, 0x11237, - 0x112df, 0x112ea, - 0x11301, 0x11303, - 0x1133c, 0x1133c, - 0x1133e, 0x11344, - 0x11347, 0x11348, - 0x1134b, 0x1134d, - 0x11357, 0x11357, - 0x11362, 0x11363, - 0x11366, 0x1136c, - 0x11370, 0x11374, - 0x114b0, 0x114c3, - 0x115af, 0x115b5, - 0x115b8, 0x115c0, - 0x11630, 0x11640, - 0x116ab, 0x116b7, - 0x16af0, 0x16af4, - 0x16b30, 0x16b36, - 0x16f51, 0x16f7e, - 0x16f8f, 0x16f92, - 0x1bc9d, 0x1bc9e, - 0x1d165, 0x1d169, - 0x1d16d, 0x1d172, - 0x1d17b, 0x1d182, - 0x1d185, 0x1d18b, - 0x1d1aa, 0x1d1ad, - 0x1d242, 0x1d244, - 0x1e8d0, 0x1e8d6, - 0xe0100, 0xe01ef -}; -UCP_FN(M) - -static const unichar ucp_Mc_def[] = { - 0x903, 0x903, - 0x93b, 0x93b, - 0x93e, 0x940, - 0x949, 0x94c, - 0x94e, 0x94f, - 0x982, 0x983, - 0x9be, 0x9c0, - 0x9c7, 0x9c8, - 0x9cb, 0x9cc, - 0x9d7, 0x9d7, - 0xa03, 0xa03, - 0xa3e, 0xa40, - 0xa83, 0xa83, - 0xabe, 0xac0, - 0xac9, 0xac9, - 0xacb, 0xacc, - 0xb02, 0xb03, - 0xb3e, 0xb3e, - 0xb40, 0xb40, - 0xb47, 0xb48, - 0xb4b, 0xb4c, - 0xb57, 0xb57, - 0xbbe, 0xbbf, - 0xbc1, 0xbc2, - 0xbc6, 0xbc8, - 0xbca, 0xbcc, - 0xbd7, 0xbd7, - 0xc01, 0xc03, - 0xc41, 0xc44, - 0xc82, 0xc83, - 0xcbe, 0xcbe, - 0xcc0, 0xcc4, - 0xcc7, 0xcc8, - 0xcca, 0xccb, - 0xcd5, 0xcd6, - 0xd02, 0xd03, - 0xd3e, 0xd40, - 0xd46, 0xd48, - 0xd4a, 0xd4c, - 0xd57, 0xd57, - 0xd82, 0xd83, - 0xdcf, 0xdd1, - 0xdd8, 0xddf, - 0xdf2, 0xdf3, - 0xf3e, 0xf3f, - 0xf7f, 0xf7f, - 0x102b, 0x102c, - 0x1031, 0x1031, - 0x1038, 0x1038, - 0x103b, 0x103c, - 0x1056, 0x1057, - 0x1062, 0x1064, - 0x1067, 0x106d, - 0x1083, 0x1084, - 0x1087, 0x108c, - 0x108f, 0x108f, - 0x109a, 0x109c, - 0x17b6, 0x17b6, - 0x17be, 0x17c5, - 0x17c7, 0x17c8, - 0x1923, 0x1926, - 0x1929, 0x192b, - 0x1930, 0x1931, - 0x1933, 0x1938, - 0x19b0, 0x19c0, - 0x19c8, 0x19c9, - 0x1a19, 0x1a1a, - 0x1a55, 0x1a55, - 0x1a57, 0x1a57, - 0x1a61, 0x1a61, - 0x1a63, 0x1a64, - 0x1a6d, 0x1a72, - 0x1b04, 0x1b04, - 0x1b35, 0x1b35, - 0x1b3b, 0x1b3b, - 0x1b3d, 0x1b41, - 0x1b43, 0x1b44, - 0x1b82, 0x1b82, - 0x1ba1, 0x1ba1, - 0x1ba6, 0x1ba7, - 0x1baa, 0x1baa, - 0x1be7, 0x1be7, - 0x1bea, 0x1bec, - 0x1bee, 0x1bee, - 0x1bf2, 0x1bf3, - 0x1c24, 0x1c2b, - 0x1c34, 0x1c35, - 0x1ce1, 0x1ce1, - 0x1cf2, 0x1cf3, - 0x302e, 0x302f, - 0xa823, 0xa824, - 0xa827, 0xa827, - 0xa880, 0xa881, - 0xa8b4, 0xa8c3, - 0xa952, 0xa953, - 0xa983, 0xa983, - 0xa9b4, 0xa9b5, - 0xa9ba, 0xa9bb, - 0xa9bd, 0xa9c0, - 0xaa2f, 0xaa30, - 0xaa33, 0xaa34, - 0xaa4d, 0xaa4d, - 0xaa7b, 0xaa7b, - 0xaa7d, 0xaa7d, - 0xaaeb, 0xaaeb, - 0xaaee, 0xaaef, - 0xaaf5, 0xaaf5, - 0xabe3, 0xabe4, - 0xabe6, 0xabe7, - 0xabe9, 0xabea, - 0xabec, 0xabec, - 0x11000, 0x11000, - 0x11002, 0x11002, - 0x11082, 0x11082, - 0x110b0, 0x110b2, - 0x110b7, 0x110b8, - 0x1112c, 0x1112c, - 0x11182, 0x11182, - 0x111b3, 0x111b5, - 0x111bf, 0x111c0, - 0x1122c, 0x1122e, - 0x11232, 0x11233, - 0x11235, 0x11235, - 0x112e0, 0x112e2, - 0x11302, 0x11303, - 0x1133e, 0x1133f, - 0x11341, 0x11344, - 0x11347, 0x11348, - 0x1134b, 0x1134d, - 0x11357, 0x11357, - 0x11362, 0x11363, - 0x114b0, 0x114b2, - 0x114b9, 0x114b9, - 0x114bb, 0x114be, - 0x114c1, 0x114c1, - 0x115af, 0x115b1, - 0x115b8, 0x115bb, - 0x115be, 0x115be, - 0x11630, 0x11632, - 0x1163b, 0x1163c, - 0x1163e, 0x1163e, - 0x116ac, 0x116ac, - 0x116ae, 0x116af, - 0x116b6, 0x116b6, - 0x16f51, 0x16f7e, - 0x1d165, 0x1d166, - 0x1d16d, 0x1d172 -}; -UCP_FN(Mc) - -static const unichar ucp_Me_def[] = { - 0x488, 0x489, - 0x1abe, 0x1abe, - 0x20dd, 0x20e0, - 0x20e2, 0x20e4, - 0xa670, 0xa672 -}; -UCP_FN(Me) - -static const unichar ucp_Mn_def[] = { - 0x300, 0x36f, - 0x483, 0x487, - 0x591, 0x5bd, - 0x5bf, 0x5bf, - 0x5c1, 0x5c2, - 0x5c4, 0x5c5, - 0x5c7, 0x5c7, - 0x610, 0x61a, - 0x64b, 0x65f, - 0x670, 0x670, - 0x6d6, 0x6dc, - 0x6df, 0x6e4, - 0x6e7, 0x6e8, - 0x6ea, 0x6ed, - 0x711, 0x711, - 0x730, 0x74a, - 0x7a6, 0x7b0, - 0x7eb, 0x7f3, - 0x816, 0x819, - 0x81b, 0x823, - 0x825, 0x827, - 0x829, 0x82d, - 0x859, 0x85b, - 0x8e4, 0x902, - 0x93a, 0x93a, - 0x93c, 0x93c, - 0x941, 0x948, - 0x94d, 0x94d, - 0x951, 0x957, - 0x962, 0x963, - 0x981, 0x981, - 0x9bc, 0x9bc, - 0x9c1, 0x9c4, - 0x9cd, 0x9cd, - 0x9e2, 0x9e3, - 0xa01, 0xa02, - 0xa3c, 0xa3c, - 0xa41, 0xa42, - 0xa47, 0xa48, - 0xa4b, 0xa4d, - 0xa51, 0xa51, - 0xa70, 0xa71, - 0xa75, 0xa75, - 0xa81, 0xa82, - 0xabc, 0xabc, - 0xac1, 0xac5, - 0xac7, 0xac8, - 0xacd, 0xacd, - 0xae2, 0xae3, - 0xb01, 0xb01, - 0xb3c, 0xb3c, - 0xb3f, 0xb3f, - 0xb41, 0xb44, - 0xb4d, 0xb4d, - 0xb56, 0xb56, - 0xb62, 0xb63, - 0xb82, 0xb82, - 0xbc0, 0xbc0, - 0xbcd, 0xbcd, - 0xc00, 0xc00, - 0xc3e, 0xc40, - 0xc46, 0xc48, - 0xc4a, 0xc4d, - 0xc55, 0xc56, - 0xc62, 0xc63, - 0xc81, 0xc81, - 0xcbc, 0xcbc, - 0xcbf, 0xcbf, - 0xcc6, 0xcc6, - 0xccc, 0xccd, - 0xce2, 0xce3, - 0xd01, 0xd01, - 0xd41, 0xd44, - 0xd4d, 0xd4d, - 0xd62, 0xd63, - 0xdca, 0xdca, - 0xdd2, 0xdd4, - 0xdd6, 0xdd6, - 0xe31, 0xe31, - 0xe34, 0xe3a, - 0xe47, 0xe4e, - 0xeb1, 0xeb1, - 0xeb4, 0xeb9, - 0xebb, 0xebc, - 0xec8, 0xecd, - 0xf18, 0xf19, - 0xf35, 0xf35, - 0xf37, 0xf37, - 0xf39, 0xf39, - 0xf71, 0xf7e, - 0xf80, 0xf84, - 0xf86, 0xf87, - 0xf8d, 0xf97, - 0xf99, 0xfbc, - 0xfc6, 0xfc6, - 0x102d, 0x1030, - 0x1032, 0x1037, - 0x1039, 0x103a, - 0x103d, 0x103e, - 0x1058, 0x1059, - 0x105e, 0x1060, - 0x1071, 0x1074, - 0x1082, 0x1082, - 0x1085, 0x1086, - 0x108d, 0x108d, - 0x109d, 0x109d, - 0x135d, 0x135f, - 0x1712, 0x1714, - 0x1732, 0x1734, - 0x1752, 0x1753, - 0x1772, 0x1773, - 0x17b4, 0x17b5, - 0x17b7, 0x17bd, - 0x17c6, 0x17c6, - 0x17c9, 0x17d3, - 0x17dd, 0x17dd, - 0x180b, 0x180d, - 0x18a9, 0x18a9, - 0x1920, 0x1922, - 0x1927, 0x1928, - 0x1932, 0x1932, - 0x1939, 0x193b, - 0x1a17, 0x1a18, - 0x1a1b, 0x1a1b, - 0x1a56, 0x1a56, - 0x1a58, 0x1a5e, - 0x1a60, 0x1a60, - 0x1a62, 0x1a62, - 0x1a65, 0x1a6c, - 0x1a73, 0x1a7c, - 0x1a7f, 0x1a7f, - 0x1ab0, 0x1abd, - 0x1b00, 0x1b03, - 0x1b34, 0x1b34, - 0x1b36, 0x1b3a, - 0x1b3c, 0x1b3c, - 0x1b42, 0x1b42, - 0x1b6b, 0x1b73, - 0x1b80, 0x1b81, - 0x1ba2, 0x1ba5, - 0x1ba8, 0x1ba9, - 0x1bab, 0x1bad, - 0x1be6, 0x1be6, - 0x1be8, 0x1be9, - 0x1bed, 0x1bed, - 0x1bef, 0x1bf1, - 0x1c2c, 0x1c33, - 0x1c36, 0x1c37, - 0x1cd0, 0x1cd2, - 0x1cd4, 0x1ce0, - 0x1ce2, 0x1ce8, - 0x1ced, 0x1ced, - 0x1cf4, 0x1cf4, - 0x1cf8, 0x1cf9, - 0x1dc0, 0x1df5, - 0x1dfc, 0x1dff, - 0x20d0, 0x20dc, - 0x20e1, 0x20e1, - 0x20e5, 0x20f0, - 0x2cef, 0x2cf1, - 0x2d7f, 0x2d7f, - 0x2de0, 0x2dff, - 0x302a, 0x302d, - 0x3099, 0x309a, - 0xa66f, 0xa66f, - 0xa674, 0xa67d, - 0xa69f, 0xa69f, - 0xa6f0, 0xa6f1, - 0xa802, 0xa802, - 0xa806, 0xa806, - 0xa80b, 0xa80b, - 0xa825, 0xa826, - 0xa8c4, 0xa8c4, - 0xa8e0, 0xa8f1, - 0xa926, 0xa92d, - 0xa947, 0xa951, - 0xa980, 0xa982, - 0xa9b3, 0xa9b3, - 0xa9b6, 0xa9b9, - 0xa9bc, 0xa9bc, - 0xa9e5, 0xa9e5, - 0xaa29, 0xaa2e, - 0xaa31, 0xaa32, - 0xaa35, 0xaa36, - 0xaa43, 0xaa43, - 0xaa4c, 0xaa4c, - 0xaa7c, 0xaa7c, - 0xaab0, 0xaab0, - 0xaab2, 0xaab4, - 0xaab7, 0xaab8, - 0xaabe, 0xaabf, - 0xaac1, 0xaac1, - 0xaaec, 0xaaed, - 0xaaf6, 0xaaf6, - 0xabe5, 0xabe5, - 0xabe8, 0xabe8, - 0xabed, 0xabed, - 0xfb1e, 0xfb1e, - 0xfe00, 0xfe0f, - 0xfe20, 0xfe2d, - 0x101fd, 0x101fd, - 0x102e0, 0x102e0, - 0x10376, 0x1037a, - 0x10a01, 0x10a03, - 0x10a05, 0x10a06, - 0x10a0c, 0x10a0f, - 0x10a38, 0x10a3a, - 0x10a3f, 0x10a3f, - 0x10ae5, 0x10ae6, - 0x11001, 0x11001, - 0x11038, 0x11046, - 0x1107f, 0x11081, - 0x110b3, 0x110b6, - 0x110b9, 0x110ba, - 0x11100, 0x11102, - 0x11127, 0x1112b, - 0x1112d, 0x11134, - 0x11173, 0x11173, - 0x11180, 0x11181, - 0x111b6, 0x111be, - 0x1122f, 0x11231, - 0x11234, 0x11234, - 0x11236, 0x11237, - 0x112df, 0x112df, - 0x112e3, 0x112ea, - 0x11301, 0x11301, - 0x1133c, 0x1133c, - 0x11340, 0x11340, - 0x11366, 0x1136c, - 0x11370, 0x11374, - 0x114b3, 0x114b8, - 0x114ba, 0x114ba, - 0x114bf, 0x114c0, - 0x114c2, 0x114c3, - 0x115b2, 0x115b5, - 0x115bc, 0x115bd, - 0x115bf, 0x115c0, - 0x11633, 0x1163a, - 0x1163d, 0x1163d, - 0x1163f, 0x11640, - 0x116ab, 0x116ab, - 0x116ad, 0x116ad, - 0x116b0, 0x116b5, - 0x116b7, 0x116b7, - 0x16af0, 0x16af4, - 0x16b30, 0x16b36, - 0x16f8f, 0x16f92, - 0x1bc9d, 0x1bc9e, - 0x1d167, 0x1d169, - 0x1d17b, 0x1d182, - 0x1d185, 0x1d18b, - 0x1d1aa, 0x1d1ad, - 0x1d242, 0x1d244, - 0x1e8d0, 0x1e8d6, - 0xe0100, 0xe01ef -}; -UCP_FN(Mn) - -static const unichar ucp_N_def[] = { - 0x30, 0x39, - 0xb2, 0xb3, - 0xb9, 0xb9, - 0xbc, 0xbe, - 0x660, 0x669, - 0x6f0, 0x6f9, - 0x7c0, 0x7c9, - 0x966, 0x96f, - 0x9e6, 0x9ef, - 0x9f4, 0x9f9, - 0xa66, 0xa6f, - 0xae6, 0xaef, - 0xb66, 0xb6f, - 0xb72, 0xb77, - 0xbe6, 0xbf2, - 0xc66, 0xc6f, - 0xc78, 0xc7e, - 0xce6, 0xcef, - 0xd66, 0xd75, - 0xde6, 0xdef, - 0xe50, 0xe59, - 0xed0, 0xed9, - 0xf20, 0xf33, - 0x1040, 0x1049, - 0x1090, 0x1099, - 0x1369, 0x137c, - 0x16ee, 0x16f0, - 0x17e0, 0x17e9, - 0x17f0, 0x17f9, - 0x1810, 0x1819, - 0x1946, 0x194f, - 0x19d0, 0x19da, - 0x1a80, 0x1a89, - 0x1a90, 0x1a99, - 0x1b50, 0x1b59, - 0x1bb0, 0x1bb9, - 0x1c40, 0x1c49, - 0x1c50, 0x1c59, - 0x2070, 0x2070, - 0x2074, 0x2079, - 0x2080, 0x2089, - 0x2150, 0x2182, - 0x2185, 0x2189, - 0x2460, 0x249b, - 0x24ea, 0x24ff, - 0x2776, 0x2793, - 0x2cfd, 0x2cfd, - 0x3007, 0x3007, - 0x3021, 0x3029, - 0x3038, 0x303a, - 0x3192, 0x3195, - 0x3220, 0x3229, - 0x3248, 0x324f, - 0x3251, 0x325f, - 0x3280, 0x3289, - 0x32b1, 0x32bf, - 0xa620, 0xa629, - 0xa6e6, 0xa6ef, - 0xa830, 0xa835, - 0xa8d0, 0xa8d9, - 0xa900, 0xa909, - 0xa9d0, 0xa9d9, - 0xa9f0, 0xa9f9, - 0xaa50, 0xaa59, - 0xabf0, 0xabf9, - 0xff10, 0xff19, - 0x10107, 0x10133, - 0x10140, 0x10178, - 0x1018a, 0x1018b, - 0x102e1, 0x102fb, - 0x10320, 0x10323, - 0x10341, 0x10341, - 0x1034a, 0x1034a, - 0x103d1, 0x103d5, - 0x104a0, 0x104a9, - 0x10858, 0x1085f, - 0x10879, 0x1087f, - 0x108a7, 0x108af, - 0x10916, 0x1091b, - 0x10a40, 0x10a47, - 0x10a7d, 0x10a7e, - 0x10a9d, 0x10a9f, - 0x10aeb, 0x10aef, - 0x10b58, 0x10b5f, - 0x10b78, 0x10b7f, - 0x10ba9, 0x10baf, - 0x10e60, 0x10e7e, - 0x11052, 0x1106f, - 0x110f0, 0x110f9, - 0x11136, 0x1113f, - 0x111d0, 0x111d9, - 0x111e1, 0x111f4, - 0x112f0, 0x112f9, - 0x114d0, 0x114d9, - 0x11650, 0x11659, - 0x116c0, 0x116c9, - 0x118e0, 0x118f2, - 0x12400, 0x1246e, - 0x16a60, 0x16a69, - 0x16b50, 0x16b59, - 0x16b5b, 0x16b61, - 0x1d360, 0x1d371, - 0x1d7ce, 0x1d7ff, - 0x1e8c7, 0x1e8cf, - 0x1f100, 0x1f10c -}; -UCP_FN(N) - -static const unichar ucp_Nd_def[] = { - 0x30, 0x39, - 0x660, 0x669, - 0x6f0, 0x6f9, - 0x7c0, 0x7c9, - 0x966, 0x96f, - 0x9e6, 0x9ef, - 0xa66, 0xa6f, - 0xae6, 0xaef, - 0xb66, 0xb6f, - 0xbe6, 0xbef, - 0xc66, 0xc6f, - 0xce6, 0xcef, - 0xd66, 0xd6f, - 0xde6, 0xdef, - 0xe50, 0xe59, - 0xed0, 0xed9, - 0xf20, 0xf29, - 0x1040, 0x1049, - 0x1090, 0x1099, - 0x17e0, 0x17e9, - 0x1810, 0x1819, - 0x1946, 0x194f, - 0x19d0, 0x19d9, - 0x1a80, 0x1a89, - 0x1a90, 0x1a99, - 0x1b50, 0x1b59, - 0x1bb0, 0x1bb9, - 0x1c40, 0x1c49, - 0x1c50, 0x1c59, - 0xa620, 0xa629, - 0xa8d0, 0xa8d9, - 0xa900, 0xa909, - 0xa9d0, 0xa9d9, - 0xa9f0, 0xa9f9, - 0xaa50, 0xaa59, - 0xabf0, 0xabf9, - 0xff10, 0xff19, - 0x104a0, 0x104a9, - 0x11066, 0x1106f, - 0x110f0, 0x110f9, - 0x11136, 0x1113f, - 0x111d0, 0x111d9, - 0x112f0, 0x112f9, - 0x114d0, 0x114d9, - 0x11650, 0x11659, - 0x116c0, 0x116c9, - 0x118e0, 0x118e9, - 0x16a60, 0x16a69, - 0x16b50, 0x16b59, - 0x1d7ce, 0x1d7ff -}; -UCP_FN(Nd) - -static const unichar ucp_Nl_def[] = { - 0x16ee, 0x16f0, - 0x2160, 0x2182, - 0x2185, 0x2188, - 0x3007, 0x3007, - 0x3021, 0x3029, - 0x3038, 0x303a, - 0xa6e6, 0xa6ef, - 0x10140, 0x10174, - 0x10341, 0x10341, - 0x1034a, 0x1034a, - 0x103d1, 0x103d5, - 0x12400, 0x1246e -}; -UCP_FN(Nl) - -static const unichar ucp_No_def[] = { - 0xb2, 0xb3, - 0xb9, 0xb9, - 0xbc, 0xbe, - 0x9f4, 0x9f9, - 0xb72, 0xb77, - 0xbf0, 0xbf2, - 0xc78, 0xc7e, - 0xd70, 0xd75, - 0xf2a, 0xf33, - 0x1369, 0x137c, - 0x17f0, 0x17f9, - 0x19da, 0x19da, - 0x2070, 0x2070, - 0x2074, 0x2079, - 0x2080, 0x2089, - 0x2150, 0x215f, - 0x2189, 0x2189, - 0x2460, 0x249b, - 0x24ea, 0x24ff, - 0x2776, 0x2793, - 0x2cfd, 0x2cfd, - 0x3192, 0x3195, - 0x3220, 0x3229, - 0x3248, 0x324f, - 0x3251, 0x325f, - 0x3280, 0x3289, - 0x32b1, 0x32bf, - 0xa830, 0xa835, - 0x10107, 0x10133, - 0x10175, 0x10178, - 0x1018a, 0x1018b, - 0x102e1, 0x102fb, - 0x10320, 0x10323, - 0x10858, 0x1085f, - 0x10879, 0x1087f, - 0x108a7, 0x108af, - 0x10916, 0x1091b, - 0x10a40, 0x10a47, - 0x10a7d, 0x10a7e, - 0x10a9d, 0x10a9f, - 0x10aeb, 0x10aef, - 0x10b58, 0x10b5f, - 0x10b78, 0x10b7f, - 0x10ba9, 0x10baf, - 0x10e60, 0x10e7e, - 0x11052, 0x11065, - 0x111e1, 0x111f4, - 0x118ea, 0x118f2, - 0x16b5b, 0x16b61, - 0x1d360, 0x1d371, - 0x1e8c7, 0x1e8cf, - 0x1f100, 0x1f10c -}; -UCP_FN(No) - -static const unichar ucp_P_def[] = { - 0x21, 0x23, - 0x25, 0x2a, - 0x2c, 0x2f, - 0x3a, 0x3b, - 0x3f, 0x40, - 0x5b, 0x5d, - 0x5f, 0x5f, - 0x7b, 0x7b, - 0x7d, 0x7d, - 0xa1, 0xa1, - 0xa7, 0xa7, - 0xab, 0xab, - 0xb6, 0xb7, - 0xbb, 0xbb, - 0xbf, 0xbf, - 0x37e, 0x37e, - 0x387, 0x387, - 0x55a, 0x55f, - 0x589, 0x58a, - 0x5be, 0x5be, - 0x5c0, 0x5c0, - 0x5c3, 0x5c3, - 0x5c6, 0x5c6, - 0x5f3, 0x5f4, - 0x609, 0x60a, - 0x60c, 0x60d, - 0x61b, 0x61b, - 0x61e, 0x61f, - 0x66a, 0x66d, - 0x6d4, 0x6d4, - 0x700, 0x70d, - 0x7f7, 0x7f9, - 0x830, 0x83e, - 0x85e, 0x85e, - 0x964, 0x965, - 0x970, 0x970, - 0xaf0, 0xaf0, - 0xdf4, 0xdf4, - 0xe4f, 0xe4f, - 0xe5a, 0xe5b, - 0xf04, 0xf12, - 0xf14, 0xf14, - 0xf3a, 0xf3d, - 0xf85, 0xf85, - 0xfd0, 0xfd4, - 0xfd9, 0xfda, - 0x104a, 0x104f, - 0x10fb, 0x10fb, - 0x1360, 0x1368, - 0x1400, 0x1400, - 0x166d, 0x166e, - 0x169b, 0x169c, - 0x16eb, 0x16ed, - 0x1735, 0x1736, - 0x17d4, 0x17d6, - 0x17d8, 0x17da, - 0x1800, 0x180a, - 0x1944, 0x1945, - 0x1a1e, 0x1a1f, - 0x1aa0, 0x1aa6, - 0x1aa8, 0x1aad, - 0x1b5a, 0x1b60, - 0x1bfc, 0x1bff, - 0x1c3b, 0x1c3f, - 0x1c7e, 0x1c7f, - 0x1cc0, 0x1cc7, - 0x1cd3, 0x1cd3, - 0x2010, 0x2027, - 0x2030, 0x2043, - 0x2045, 0x2051, - 0x2053, 0x205e, - 0x207d, 0x207e, - 0x208d, 0x208e, - 0x2308, 0x230b, - 0x2329, 0x232a, - 0x2768, 0x2775, - 0x27c5, 0x27c6, - 0x27e6, 0x27ef, - 0x2983, 0x2998, - 0x29d8, 0x29db, - 0x29fc, 0x29fd, - 0x2cf9, 0x2cfc, - 0x2cfe, 0x2cff, - 0x2d70, 0x2d70, - 0x2e00, 0x2e2e, - 0x2e30, 0x2e42, - 0x3001, 0x3003, - 0x3008, 0x3011, - 0x3014, 0x301f, - 0x3030, 0x3030, - 0x303d, 0x303d, - 0x30a0, 0x30a0, - 0x30fb, 0x30fb, - 0xa4fe, 0xa4ff, - 0xa60d, 0xa60f, - 0xa673, 0xa673, - 0xa67e, 0xa67e, - 0xa6f2, 0xa6f7, - 0xa874, 0xa877, - 0xa8ce, 0xa8cf, - 0xa8f8, 0xa8fa, - 0xa92e, 0xa92f, - 0xa95f, 0xa95f, - 0xa9c1, 0xa9cd, - 0xa9de, 0xa9df, - 0xaa5c, 0xaa5f, - 0xaade, 0xaadf, - 0xaaf0, 0xaaf1, - 0xabeb, 0xabeb, - 0xfd3e, 0xfd3f, - 0xfe10, 0xfe19, - 0xfe30, 0xfe52, - 0xfe54, 0xfe61, - 0xfe63, 0xfe63, - 0xfe68, 0xfe68, - 0xfe6a, 0xfe6b, - 0xff01, 0xff03, - 0xff05, 0xff0a, - 0xff0c, 0xff0f, - 0xff1a, 0xff1b, - 0xff1f, 0xff20, - 0xff3b, 0xff3d, - 0xff3f, 0xff3f, - 0xff5b, 0xff5b, - 0xff5d, 0xff5d, - 0xff5f, 0xff65, - 0x10100, 0x10102, - 0x1039f, 0x1039f, - 0x103d0, 0x103d0, - 0x1056f, 0x1056f, - 0x10857, 0x10857, - 0x1091f, 0x1091f, - 0x1093f, 0x1093f, - 0x10a50, 0x10a58, - 0x10a7f, 0x10a7f, - 0x10af0, 0x10af6, - 0x10b39, 0x10b3f, - 0x10b99, 0x10b9c, - 0x11047, 0x1104d, - 0x110bb, 0x110bc, - 0x110be, 0x110c1, - 0x11140, 0x11143, - 0x11174, 0x11175, - 0x111c5, 0x111c8, - 0x111cd, 0x111cd, - 0x11238, 0x1123d, - 0x114c6, 0x114c6, - 0x115c1, 0x115c9, - 0x11641, 0x11643, - 0x12470, 0x12474, - 0x16a6e, 0x16a6f, - 0x16af5, 0x16af5, - 0x16b37, 0x16b3b, - 0x16b44, 0x16b44, - 0x1bc9f, 0x1bc9f -}; -UCP_FN(P) - -static const unichar ucp_Pc_def[] = { - 0x5f, 0x5f, - 0x203f, 0x2040, - 0x2054, 0x2054, - 0xfe33, 0xfe34, - 0xfe4d, 0xfe4f, - 0xff3f, 0xff3f -}; -UCP_FN(Pc) - -static const unichar ucp_Pd_def[] = { - 0x2d, 0x2d, - 0x58a, 0x58a, - 0x5be, 0x5be, - 0x1400, 0x1400, - 0x1806, 0x1806, - 0x2010, 0x2015, - 0x2e17, 0x2e17, - 0x2e1a, 0x2e1a, - 0x2e3a, 0x2e3b, - 0x2e40, 0x2e40, - 0x301c, 0x301c, - 0x3030, 0x3030, - 0x30a0, 0x30a0, - 0xfe31, 0xfe32, - 0xfe58, 0xfe58, - 0xfe63, 0xfe63, - 0xff0d, 0xff0d -}; -UCP_FN(Pd) - -static const unichar ucp_Pe_def[] = { - 0x29, 0x29, - 0x5d, 0x5d, - 0x7d, 0x7d, - 0xf3b, 0xf3b, - 0xf3d, 0xf3d, - 0x169c, 0x169c, - 0x2046, 0x2046, - 0x207e, 0x207e, - 0x208e, 0x208e, - 0x2309, 0x2309, - 0x230b, 0x230b, - 0x232a, 0x232a, - 0x2769, 0x2769, - 0x276b, 0x276b, - 0x276d, 0x276d, - 0x276f, 0x276f, - 0x2771, 0x2771, - 0x2773, 0x2773, - 0x2775, 0x2775, - 0x27c6, 0x27c6, - 0x27e7, 0x27e7, - 0x27e9, 0x27e9, - 0x27eb, 0x27eb, - 0x27ed, 0x27ed, - 0x27ef, 0x27ef, - 0x2984, 0x2984, - 0x2986, 0x2986, - 0x2988, 0x2988, - 0x298a, 0x298a, - 0x298c, 0x298c, - 0x298e, 0x298e, - 0x2990, 0x2990, - 0x2992, 0x2992, - 0x2994, 0x2994, - 0x2996, 0x2996, - 0x2998, 0x2998, - 0x29d9, 0x29d9, - 0x29db, 0x29db, - 0x29fd, 0x29fd, - 0x2e23, 0x2e23, - 0x2e25, 0x2e25, - 0x2e27, 0x2e27, - 0x2e29, 0x2e29, - 0x3009, 0x3009, - 0x300b, 0x300b, - 0x300d, 0x300d, - 0x300f, 0x300f, - 0x3011, 0x3011, - 0x3015, 0x3015, - 0x3017, 0x3017, - 0x3019, 0x3019, - 0x301b, 0x301b, - 0x301e, 0x301f, - 0xfd3e, 0xfd3e, - 0xfe18, 0xfe18, - 0xfe36, 0xfe36, - 0xfe38, 0xfe38, - 0xfe3a, 0xfe3a, - 0xfe3c, 0xfe3c, - 0xfe3e, 0xfe3e, - 0xfe40, 0xfe40, - 0xfe42, 0xfe42, - 0xfe44, 0xfe44, - 0xfe48, 0xfe48, - 0xfe5a, 0xfe5a, - 0xfe5c, 0xfe5c, - 0xfe5e, 0xfe5e, - 0xff09, 0xff09, - 0xff3d, 0xff3d, - 0xff5d, 0xff5d, - 0xff60, 0xff60, - 0xff63, 0xff63 -}; -UCP_FN(Pe) - -static const unichar ucp_Pf_def[] = { - 0xbb, 0xbb, - 0x2019, 0x2019, - 0x201d, 0x201d, - 0x203a, 0x203a, - 0x2e03, 0x2e03, - 0x2e05, 0x2e05, - 0x2e0a, 0x2e0a, - 0x2e0d, 0x2e0d, - 0x2e1d, 0x2e1d, - 0x2e21, 0x2e21 -}; -UCP_FN(Pf) - -static const unichar ucp_Pi_def[] = { - 0xab, 0xab, - 0x2018, 0x2018, - 0x201b, 0x201c, - 0x201f, 0x201f, - 0x2039, 0x2039, - 0x2e02, 0x2e02, - 0x2e04, 0x2e04, - 0x2e09, 0x2e09, - 0x2e0c, 0x2e0c, - 0x2e1c, 0x2e1c, - 0x2e20, 0x2e20 -}; -UCP_FN(Pi) - -static const unichar ucp_Po_def[] = { - 0x21, 0x23, - 0x25, 0x27, - 0x2a, 0x2a, - 0x2c, 0x2c, - 0x2e, 0x2f, - 0x3a, 0x3b, - 0x3f, 0x40, - 0x5c, 0x5c, - 0xa1, 0xa1, - 0xa7, 0xa7, - 0xb6, 0xb7, - 0xbf, 0xbf, - 0x37e, 0x37e, - 0x387, 0x387, - 0x55a, 0x55f, - 0x589, 0x589, - 0x5c0, 0x5c0, - 0x5c3, 0x5c3, - 0x5c6, 0x5c6, - 0x5f3, 0x5f4, - 0x609, 0x60a, - 0x60c, 0x60d, - 0x61b, 0x61b, - 0x61e, 0x61f, - 0x66a, 0x66d, - 0x6d4, 0x6d4, - 0x700, 0x70d, - 0x7f7, 0x7f9, - 0x830, 0x83e, - 0x85e, 0x85e, - 0x964, 0x965, - 0x970, 0x970, - 0xaf0, 0xaf0, - 0xdf4, 0xdf4, - 0xe4f, 0xe4f, - 0xe5a, 0xe5b, - 0xf04, 0xf12, - 0xf14, 0xf14, - 0xf85, 0xf85, - 0xfd0, 0xfd4, - 0xfd9, 0xfda, - 0x104a, 0x104f, - 0x10fb, 0x10fb, - 0x1360, 0x1368, - 0x166d, 0x166e, - 0x16eb, 0x16ed, - 0x1735, 0x1736, - 0x17d4, 0x17d6, - 0x17d8, 0x17da, - 0x1800, 0x1805, - 0x1807, 0x180a, - 0x1944, 0x1945, - 0x1a1e, 0x1a1f, - 0x1aa0, 0x1aa6, - 0x1aa8, 0x1aad, - 0x1b5a, 0x1b60, - 0x1bfc, 0x1bff, - 0x1c3b, 0x1c3f, - 0x1c7e, 0x1c7f, - 0x1cc0, 0x1cc7, - 0x1cd3, 0x1cd3, - 0x2016, 0x2017, - 0x2020, 0x2027, - 0x2030, 0x2038, - 0x203b, 0x203e, - 0x2041, 0x2043, - 0x2047, 0x2051, - 0x2053, 0x2053, - 0x2055, 0x205e, - 0x2cf9, 0x2cfc, - 0x2cfe, 0x2cff, - 0x2d70, 0x2d70, - 0x2e00, 0x2e01, - 0x2e06, 0x2e08, - 0x2e0b, 0x2e0b, - 0x2e0e, 0x2e16, - 0x2e18, 0x2e19, - 0x2e1b, 0x2e1b, - 0x2e1e, 0x2e1f, - 0x2e2a, 0x2e2e, - 0x2e30, 0x2e39, - 0x2e3c, 0x2e3f, - 0x2e41, 0x2e41, - 0x3001, 0x3003, - 0x303d, 0x303d, - 0x30fb, 0x30fb, - 0xa4fe, 0xa4ff, - 0xa60d, 0xa60f, - 0xa673, 0xa673, - 0xa67e, 0xa67e, - 0xa6f2, 0xa6f7, - 0xa874, 0xa877, - 0xa8ce, 0xa8cf, - 0xa8f8, 0xa8fa, - 0xa92e, 0xa92f, - 0xa95f, 0xa95f, - 0xa9c1, 0xa9cd, - 0xa9de, 0xa9df, - 0xaa5c, 0xaa5f, - 0xaade, 0xaadf, - 0xaaf0, 0xaaf1, - 0xabeb, 0xabeb, - 0xfe10, 0xfe16, - 0xfe19, 0xfe19, - 0xfe30, 0xfe30, - 0xfe45, 0xfe46, - 0xfe49, 0xfe4c, - 0xfe50, 0xfe52, - 0xfe54, 0xfe57, - 0xfe5f, 0xfe61, - 0xfe68, 0xfe68, - 0xfe6a, 0xfe6b, - 0xff01, 0xff03, - 0xff05, 0xff07, - 0xff0a, 0xff0a, - 0xff0c, 0xff0c, - 0xff0e, 0xff0f, - 0xff1a, 0xff1b, - 0xff1f, 0xff20, - 0xff3c, 0xff3c, - 0xff61, 0xff61, - 0xff64, 0xff65, - 0x10100, 0x10102, - 0x1039f, 0x1039f, - 0x103d0, 0x103d0, - 0x1056f, 0x1056f, - 0x10857, 0x10857, - 0x1091f, 0x1091f, - 0x1093f, 0x1093f, - 0x10a50, 0x10a58, - 0x10a7f, 0x10a7f, - 0x10af0, 0x10af6, - 0x10b39, 0x10b3f, - 0x10b99, 0x10b9c, - 0x11047, 0x1104d, - 0x110bb, 0x110bc, - 0x110be, 0x110c1, - 0x11140, 0x11143, - 0x11174, 0x11175, - 0x111c5, 0x111c8, - 0x111cd, 0x111cd, - 0x11238, 0x1123d, - 0x114c6, 0x114c6, - 0x115c1, 0x115c9, - 0x11641, 0x11643, - 0x12470, 0x12474, - 0x16a6e, 0x16a6f, - 0x16af5, 0x16af5, - 0x16b37, 0x16b3b, - 0x16b44, 0x16b44, - 0x1bc9f, 0x1bc9f -}; -UCP_FN(Po) - -static const unichar ucp_Ps_def[] = { - 0x28, 0x28, - 0x5b, 0x5b, - 0x7b, 0x7b, - 0xf3a, 0xf3a, - 0xf3c, 0xf3c, - 0x169b, 0x169b, - 0x201a, 0x201a, - 0x201e, 0x201e, - 0x2045, 0x2045, - 0x207d, 0x207d, - 0x208d, 0x208d, - 0x2308, 0x2308, - 0x230a, 0x230a, - 0x2329, 0x2329, - 0x2768, 0x2768, - 0x276a, 0x276a, - 0x276c, 0x276c, - 0x276e, 0x276e, - 0x2770, 0x2770, - 0x2772, 0x2772, - 0x2774, 0x2774, - 0x27c5, 0x27c5, - 0x27e6, 0x27e6, - 0x27e8, 0x27e8, - 0x27ea, 0x27ea, - 0x27ec, 0x27ec, - 0x27ee, 0x27ee, - 0x2983, 0x2983, - 0x2985, 0x2985, - 0x2987, 0x2987, - 0x2989, 0x2989, - 0x298b, 0x298b, - 0x298d, 0x298d, - 0x298f, 0x298f, - 0x2991, 0x2991, - 0x2993, 0x2993, - 0x2995, 0x2995, - 0x2997, 0x2997, - 0x29d8, 0x29d8, - 0x29da, 0x29da, - 0x29fc, 0x29fc, - 0x2e22, 0x2e22, - 0x2e24, 0x2e24, - 0x2e26, 0x2e26, - 0x2e28, 0x2e28, - 0x2e42, 0x2e42, - 0x3008, 0x3008, - 0x300a, 0x300a, - 0x300c, 0x300c, - 0x300e, 0x300e, - 0x3010, 0x3010, - 0x3014, 0x3014, - 0x3016, 0x3016, - 0x3018, 0x3018, - 0x301a, 0x301a, - 0x301d, 0x301d, - 0xfd3f, 0xfd3f, - 0xfe17, 0xfe17, - 0xfe35, 0xfe35, - 0xfe37, 0xfe37, - 0xfe39, 0xfe39, - 0xfe3b, 0xfe3b, - 0xfe3d, 0xfe3d, - 0xfe3f, 0xfe3f, - 0xfe41, 0xfe41, - 0xfe43, 0xfe43, - 0xfe47, 0xfe47, - 0xfe59, 0xfe59, - 0xfe5b, 0xfe5b, - 0xfe5d, 0xfe5d, - 0xff08, 0xff08, - 0xff3b, 0xff3b, - 0xff5b, 0xff5b, - 0xff5f, 0xff5f, - 0xff62, 0xff62 -}; -UCP_FN(Ps) - -static const unichar ucp_S_def[] = { - 0x24, 0x24, - 0x2b, 0x2b, - 0x3c, 0x3e, - 0x5e, 0x5e, - 0x60, 0x60, - 0x7c, 0x7c, - 0x7e, 0x7e, - 0xa2, 0xa6, - 0xa8, 0xa9, - 0xac, 0xac, - 0xae, 0xb1, - 0xb4, 0xb4, - 0xb8, 0xb8, - 0xd7, 0xd7, - 0xf7, 0xf7, - 0x2c2, 0x2c5, - 0x2d2, 0x2df, - 0x2e5, 0x2eb, - 0x2ed, 0x2ed, - 0x2ef, 0x2ff, - 0x375, 0x375, - 0x384, 0x385, - 0x3f6, 0x3f6, - 0x482, 0x482, - 0x58d, 0x58f, - 0x606, 0x608, - 0x60b, 0x60b, - 0x60e, 0x60f, - 0x6de, 0x6de, - 0x6e9, 0x6e9, - 0x6fd, 0x6fe, - 0x7f6, 0x7f6, - 0x9f2, 0x9f3, - 0x9fa, 0x9fb, - 0xaf1, 0xaf1, - 0xb70, 0xb70, - 0xbf3, 0xbfa, - 0xc7f, 0xc7f, - 0xd79, 0xd79, - 0xe3f, 0xe3f, - 0xf01, 0xf03, - 0xf13, 0xf13, - 0xf15, 0xf17, - 0xf1a, 0xf1f, - 0xf34, 0xf34, - 0xf36, 0xf36, - 0xf38, 0xf38, - 0xfbe, 0xfc5, - 0xfc7, 0xfcc, - 0xfce, 0xfcf, - 0xfd5, 0xfd8, - 0x109e, 0x109f, - 0x1390, 0x1399, - 0x17db, 0x17db, - 0x1940, 0x1940, - 0x19de, 0x19ff, - 0x1b61, 0x1b6a, - 0x1b74, 0x1b7c, - 0x1fbd, 0x1fbd, - 0x1fbf, 0x1fc1, - 0x1fcd, 0x1fcf, - 0x1fdd, 0x1fdf, - 0x1fed, 0x1fef, - 0x1ffd, 0x1ffe, - 0x2044, 0x2044, - 0x2052, 0x2052, - 0x207a, 0x207c, - 0x208a, 0x208c, - 0x20a0, 0x20bd, - 0x2100, 0x2101, - 0x2103, 0x2106, - 0x2108, 0x2109, - 0x2114, 0x2114, - 0x2116, 0x2118, - 0x211e, 0x2123, - 0x2125, 0x2125, - 0x2127, 0x2127, - 0x2129, 0x2129, - 0x212e, 0x212e, - 0x213a, 0x213b, - 0x2140, 0x2144, - 0x214a, 0x214d, - 0x214f, 0x214f, - 0x2190, 0x2307, - 0x230c, 0x2328, - 0x232b, 0x23fa, - 0x2400, 0x2426, - 0x2440, 0x244a, - 0x249c, 0x24e9, - 0x2500, 0x2767, - 0x2794, 0x27c4, - 0x27c7, 0x27e5, - 0x27f0, 0x2982, - 0x2999, 0x29d7, - 0x29dc, 0x29fb, - 0x29fe, 0x2b73, - 0x2b76, 0x2b95, - 0x2b98, 0x2bb9, - 0x2bbd, 0x2bc8, - 0x2bca, 0x2bd1, - 0x2ce5, 0x2cea, - 0x2e80, 0x2e99, - 0x2e9b, 0x2ef3, - 0x2f00, 0x2fd5, - 0x2ff0, 0x2ffb, - 0x3004, 0x3004, - 0x3012, 0x3013, - 0x3020, 0x3020, - 0x3036, 0x3037, - 0x303e, 0x303f, - 0x309b, 0x309c, - 0x3190, 0x3191, - 0x3196, 0x319f, - 0x31c0, 0x31e3, - 0x3200, 0x321e, - 0x322a, 0x3247, - 0x3250, 0x3250, - 0x3260, 0x327f, - 0x328a, 0x32b0, - 0x32c0, 0x32fe, - 0x3300, 0x33ff, - 0x4dc0, 0x4dff, - 0xa490, 0xa4c6, - 0xa700, 0xa716, - 0xa720, 0xa721, - 0xa789, 0xa78a, - 0xa828, 0xa82b, - 0xa836, 0xa839, - 0xaa77, 0xaa79, - 0xab5b, 0xab5b, - 0xfb29, 0xfb29, - 0xfbb2, 0xfbc1, - 0xfdfc, 0xfdfd, - 0xfe62, 0xfe62, - 0xfe64, 0xfe66, - 0xfe69, 0xfe69, - 0xff04, 0xff04, - 0xff0b, 0xff0b, - 0xff1c, 0xff1e, - 0xff3e, 0xff3e, - 0xff40, 0xff40, - 0xff5c, 0xff5c, - 0xff5e, 0xff5e, - 0xffe0, 0xffe6, - 0xffe8, 0xffee, - 0xfffc, 0xfffd, - 0x10137, 0x1013f, - 0x10179, 0x10189, - 0x1018c, 0x1018c, - 0x10190, 0x1019b, - 0x101a0, 0x101a0, - 0x101d0, 0x101fc, - 0x10877, 0x10878, - 0x10ac8, 0x10ac8, - 0x16b3c, 0x16b3f, - 0x16b45, 0x16b45, - 0x1bc9c, 0x1bc9c, - 0x1d000, 0x1d0f5, - 0x1d100, 0x1d126, - 0x1d129, 0x1d164, - 0x1d16a, 0x1d16c, - 0x1d183, 0x1d184, - 0x1d18c, 0x1d1a9, - 0x1d1ae, 0x1d1dd, - 0x1d200, 0x1d241, - 0x1d245, 0x1d245, - 0x1d300, 0x1d356, - 0x1d6c1, 0x1d6c1, - 0x1d6db, 0x1d6db, - 0x1d6fb, 0x1d6fb, - 0x1d715, 0x1d715, - 0x1d735, 0x1d735, - 0x1d74f, 0x1d74f, - 0x1d76f, 0x1d76f, - 0x1d789, 0x1d789, - 0x1d7a9, 0x1d7a9, - 0x1d7c3, 0x1d7c3, - 0x1eef0, 0x1eef1, - 0x1f000, 0x1f02b, - 0x1f030, 0x1f093, - 0x1f0a0, 0x1f0ae, - 0x1f0b1, 0x1f0bf, - 0x1f0c1, 0x1f0cf, - 0x1f0d1, 0x1f0f5, - 0x1f110, 0x1f12e, - 0x1f130, 0x1f16b, - 0x1f170, 0x1f19a, - 0x1f1e6, 0x1f202, - 0x1f210, 0x1f23a, - 0x1f240, 0x1f248, - 0x1f250, 0x1f251, - 0x1f300, 0x1f32c, - 0x1f330, 0x1f37d, - 0x1f380, 0x1f3ce, - 0x1f3d4, 0x1f3f7, - 0x1f400, 0x1f4fe, - 0x1f500, 0x1f54a, - 0x1f550, 0x1f579, - 0x1f57b, 0x1f5a3, - 0x1f5a5, 0x1f642, - 0x1f645, 0x1f6cf, - 0x1f6e0, 0x1f6ec, - 0x1f6f0, 0x1f6f3, - 0x1f700, 0x1f773, - 0x1f780, 0x1f7d4, - 0x1f800, 0x1f80b, - 0x1f810, 0x1f847, - 0x1f850, 0x1f859, - 0x1f860, 0x1f887, - 0x1f890, 0x1f8ad -}; -UCP_FN(S) - -static const unichar ucp_Sc_def[] = { - 0x24, 0x24, - 0xa2, 0xa5, - 0x58f, 0x58f, - 0x60b, 0x60b, - 0x9f2, 0x9f3, - 0x9fb, 0x9fb, - 0xaf1, 0xaf1, - 0xbf9, 0xbf9, - 0xe3f, 0xe3f, - 0x17db, 0x17db, - 0x20a0, 0x20bd, - 0xa838, 0xa838, - 0xfdfc, 0xfdfc, - 0xfe69, 0xfe69, - 0xff04, 0xff04, - 0xffe0, 0xffe1, - 0xffe5, 0xffe6 -}; -UCP_FN(Sc) - -static const unichar ucp_Sk_def[] = { - 0x5e, 0x5e, - 0x60, 0x60, - 0xa8, 0xa8, - 0xaf, 0xaf, - 0xb4, 0xb4, - 0xb8, 0xb8, - 0x2c2, 0x2c5, - 0x2d2, 0x2df, - 0x2e5, 0x2eb, - 0x2ed, 0x2ed, - 0x2ef, 0x2ff, - 0x375, 0x375, - 0x384, 0x385, - 0x1fbd, 0x1fbd, - 0x1fbf, 0x1fc1, - 0x1fcd, 0x1fcf, - 0x1fdd, 0x1fdf, - 0x1fed, 0x1fef, - 0x1ffd, 0x1ffe, - 0x309b, 0x309c, - 0xa700, 0xa716, - 0xa720, 0xa721, - 0xa789, 0xa78a, - 0xab5b, 0xab5b, - 0xfbb2, 0xfbc1, - 0xff3e, 0xff3e, - 0xff40, 0xff40, - 0xffe3, 0xffe3 -}; -UCP_FN(Sk) - -static const unichar ucp_Sm_def[] = { - 0x2b, 0x2b, - 0x3c, 0x3e, - 0x7c, 0x7c, - 0x7e, 0x7e, - 0xac, 0xac, - 0xb1, 0xb1, - 0xd7, 0xd7, - 0xf7, 0xf7, - 0x3f6, 0x3f6, - 0x606, 0x608, - 0x2044, 0x2044, - 0x2052, 0x2052, - 0x207a, 0x207c, - 0x208a, 0x208c, - 0x2118, 0x2118, - 0x2140, 0x2144, - 0x214b, 0x214b, - 0x2190, 0x2194, - 0x219a, 0x219b, - 0x21a0, 0x21a0, - 0x21a3, 0x21a3, - 0x21a6, 0x21a6, - 0x21ae, 0x21ae, - 0x21ce, 0x21cf, - 0x21d2, 0x21d2, - 0x21d4, 0x21d4, - 0x21f4, 0x22ff, - 0x2320, 0x2321, - 0x237c, 0x237c, - 0x239b, 0x23b3, - 0x23dc, 0x23e1, - 0x25b7, 0x25b7, - 0x25c1, 0x25c1, - 0x25f8, 0x25ff, - 0x266f, 0x266f, - 0x27c0, 0x27c4, - 0x27c7, 0x27e5, - 0x27f0, 0x27ff, - 0x2900, 0x2982, - 0x2999, 0x29d7, - 0x29dc, 0x29fb, - 0x29fe, 0x2aff, - 0x2b30, 0x2b44, - 0x2b47, 0x2b4c, - 0xfb29, 0xfb29, - 0xfe62, 0xfe62, - 0xfe64, 0xfe66, - 0xff0b, 0xff0b, - 0xff1c, 0xff1e, - 0xff5c, 0xff5c, - 0xff5e, 0xff5e, - 0xffe2, 0xffe2, - 0xffe9, 0xffec, - 0x1d6c1, 0x1d6c1, - 0x1d6db, 0x1d6db, - 0x1d6fb, 0x1d6fb, - 0x1d715, 0x1d715, - 0x1d735, 0x1d735, - 0x1d74f, 0x1d74f, - 0x1d76f, 0x1d76f, - 0x1d789, 0x1d789, - 0x1d7a9, 0x1d7a9, - 0x1d7c3, 0x1d7c3, - 0x1eef0, 0x1eef1 -}; -UCP_FN(Sm) - -static const unichar ucp_So_def[] = { - 0xa6, 0xa6, - 0xa9, 0xa9, - 0xae, 0xae, - 0xb0, 0xb0, - 0x482, 0x482, - 0x58d, 0x58e, - 0x60e, 0x60f, - 0x6de, 0x6de, - 0x6e9, 0x6e9, - 0x6fd, 0x6fe, - 0x7f6, 0x7f6, - 0x9fa, 0x9fa, - 0xb70, 0xb70, - 0xbf3, 0xbf8, - 0xbfa, 0xbfa, - 0xc7f, 0xc7f, - 0xd79, 0xd79, - 0xf01, 0xf03, - 0xf13, 0xf13, - 0xf15, 0xf17, - 0xf1a, 0xf1f, - 0xf34, 0xf34, - 0xf36, 0xf36, - 0xf38, 0xf38, - 0xfbe, 0xfc5, - 0xfc7, 0xfcc, - 0xfce, 0xfcf, - 0xfd5, 0xfd8, - 0x109e, 0x109f, - 0x1390, 0x1399, - 0x1940, 0x1940, - 0x19de, 0x19ff, - 0x1b61, 0x1b6a, - 0x1b74, 0x1b7c, - 0x2100, 0x2101, - 0x2103, 0x2106, - 0x2108, 0x2109, - 0x2114, 0x2114, - 0x2116, 0x2117, - 0x211e, 0x2123, - 0x2125, 0x2125, - 0x2127, 0x2127, - 0x2129, 0x2129, - 0x212e, 0x212e, - 0x213a, 0x213b, - 0x214a, 0x214a, - 0x214c, 0x214d, - 0x214f, 0x214f, - 0x2195, 0x2199, - 0x219c, 0x219f, - 0x21a1, 0x21a2, - 0x21a4, 0x21a5, - 0x21a7, 0x21ad, - 0x21af, 0x21cd, - 0x21d0, 0x21d1, - 0x21d3, 0x21d3, - 0x21d5, 0x21f3, - 0x2300, 0x2307, - 0x230c, 0x231f, - 0x2322, 0x2328, - 0x232b, 0x237b, - 0x237d, 0x239a, - 0x23b4, 0x23db, - 0x23e2, 0x23fa, - 0x2400, 0x2426, - 0x2440, 0x244a, - 0x249c, 0x24e9, - 0x2500, 0x25b6, - 0x25b8, 0x25c0, - 0x25c2, 0x25f7, - 0x2600, 0x266e, - 0x2670, 0x2767, - 0x2794, 0x27bf, - 0x2800, 0x28ff, - 0x2b00, 0x2b2f, - 0x2b45, 0x2b46, - 0x2b4d, 0x2b73, - 0x2b76, 0x2b95, - 0x2b98, 0x2bb9, - 0x2bbd, 0x2bc8, - 0x2bca, 0x2bd1, - 0x2ce5, 0x2cea, - 0x2e80, 0x2e99, - 0x2e9b, 0x2ef3, - 0x2f00, 0x2fd5, - 0x2ff0, 0x2ffb, - 0x3004, 0x3004, - 0x3012, 0x3013, - 0x3020, 0x3020, - 0x3036, 0x3037, - 0x303e, 0x303f, - 0x3190, 0x3191, - 0x3196, 0x319f, - 0x31c0, 0x31e3, - 0x3200, 0x321e, - 0x322a, 0x3247, - 0x3250, 0x3250, - 0x3260, 0x327f, - 0x328a, 0x32b0, - 0x32c0, 0x32fe, - 0x3300, 0x33ff, - 0x4dc0, 0x4dff, - 0xa490, 0xa4c6, - 0xa828, 0xa82b, - 0xa836, 0xa837, - 0xa839, 0xa839, - 0xaa77, 0xaa79, - 0xfdfd, 0xfdfd, - 0xffe4, 0xffe4, - 0xffe8, 0xffe8, - 0xffed, 0xffee, - 0xfffc, 0xfffd, - 0x10137, 0x1013f, - 0x10179, 0x10189, - 0x1018c, 0x1018c, - 0x10190, 0x1019b, - 0x101a0, 0x101a0, - 0x101d0, 0x101fc, - 0x10877, 0x10878, - 0x10ac8, 0x10ac8, - 0x16b3c, 0x16b3f, - 0x16b45, 0x16b45, - 0x1bc9c, 0x1bc9c, - 0x1d000, 0x1d0f5, - 0x1d100, 0x1d126, - 0x1d129, 0x1d164, - 0x1d16a, 0x1d16c, - 0x1d183, 0x1d184, - 0x1d18c, 0x1d1a9, - 0x1d1ae, 0x1d1dd, - 0x1d200, 0x1d241, - 0x1d245, 0x1d245, - 0x1d300, 0x1d356, - 0x1f000, 0x1f02b, - 0x1f030, 0x1f093, - 0x1f0a0, 0x1f0ae, - 0x1f0b1, 0x1f0bf, - 0x1f0c1, 0x1f0cf, - 0x1f0d1, 0x1f0f5, - 0x1f110, 0x1f12e, - 0x1f130, 0x1f16b, - 0x1f170, 0x1f19a, - 0x1f1e6, 0x1f202, - 0x1f210, 0x1f23a, - 0x1f240, 0x1f248, - 0x1f250, 0x1f251, - 0x1f300, 0x1f32c, - 0x1f330, 0x1f37d, - 0x1f380, 0x1f3ce, - 0x1f3d4, 0x1f3f7, - 0x1f400, 0x1f4fe, - 0x1f500, 0x1f54a, - 0x1f550, 0x1f579, - 0x1f57b, 0x1f5a3, - 0x1f5a5, 0x1f642, - 0x1f645, 0x1f6cf, - 0x1f6e0, 0x1f6ec, - 0x1f6f0, 0x1f6f3, - 0x1f700, 0x1f773, - 0x1f780, 0x1f7d4, - 0x1f800, 0x1f80b, - 0x1f810, 0x1f847, - 0x1f850, 0x1f859, - 0x1f860, 0x1f887, - 0x1f890, 0x1f8ad -}; -UCP_FN(So) - -static const unichar ucp_Xan_def[] = { - 0x30, 0x39, - 0x41, 0x5a, - 0x61, 0x7a, - 0xaa, 0xaa, - 0xb2, 0xb3, - 0xb5, 0xb5, - 0xb9, 0xba, - 0xbc, 0xbe, - 0xc0, 0xd6, - 0xd8, 0xf6, - 0xf8, 0x2c1, - 0x2c6, 0x2d1, - 0x2e0, 0x2e4, - 0x2ec, 0x2ec, - 0x2ee, 0x2ee, - 0x370, 0x374, - 0x376, 0x377, - 0x37a, 0x37d, - 0x37f, 0x37f, - 0x386, 0x386, - 0x388, 0x38a, - 0x38c, 0x38c, - 0x38e, 0x3a1, - 0x3a3, 0x3f5, - 0x3f7, 0x481, - 0x48a, 0x52f, - 0x531, 0x556, - 0x559, 0x559, - 0x561, 0x587, - 0x5d0, 0x5ea, - 0x5f0, 0x5f2, - 0x620, 0x64a, - 0x660, 0x669, - 0x66e, 0x66f, - 0x671, 0x6d3, - 0x6d5, 0x6d5, - 0x6e5, 0x6e6, - 0x6ee, 0x6fc, - 0x6ff, 0x6ff, - 0x710, 0x710, - 0x712, 0x72f, - 0x74d, 0x7a5, - 0x7b1, 0x7b1, - 0x7c0, 0x7ea, - 0x7f4, 0x7f5, - 0x7fa, 0x7fa, - 0x800, 0x815, - 0x81a, 0x81a, - 0x824, 0x824, - 0x828, 0x828, - 0x840, 0x858, - 0x8a0, 0x8b2, - 0x904, 0x939, - 0x93d, 0x93d, - 0x950, 0x950, - 0x958, 0x961, - 0x966, 0x96f, - 0x971, 0x980, - 0x985, 0x98c, - 0x98f, 0x990, - 0x993, 0x9a8, - 0x9aa, 0x9b0, - 0x9b2, 0x9b2, - 0x9b6, 0x9b9, - 0x9bd, 0x9bd, - 0x9ce, 0x9ce, - 0x9dc, 0x9dd, - 0x9df, 0x9e1, - 0x9e6, 0x9f1, - 0x9f4, 0x9f9, - 0xa05, 0xa0a, - 0xa0f, 0xa10, - 0xa13, 0xa28, - 0xa2a, 0xa30, - 0xa32, 0xa33, - 0xa35, 0xa36, - 0xa38, 0xa39, - 0xa59, 0xa5c, - 0xa5e, 0xa5e, - 0xa66, 0xa6f, - 0xa72, 0xa74, - 0xa85, 0xa8d, - 0xa8f, 0xa91, - 0xa93, 0xaa8, - 0xaaa, 0xab0, - 0xab2, 0xab3, - 0xab5, 0xab9, - 0xabd, 0xabd, - 0xad0, 0xad0, - 0xae0, 0xae1, - 0xae6, 0xaef, - 0xb05, 0xb0c, - 0xb0f, 0xb10, - 0xb13, 0xb28, - 0xb2a, 0xb30, - 0xb32, 0xb33, - 0xb35, 0xb39, - 0xb3d, 0xb3d, - 0xb5c, 0xb5d, - 0xb5f, 0xb61, - 0xb66, 0xb6f, - 0xb71, 0xb77, - 0xb83, 0xb83, - 0xb85, 0xb8a, - 0xb8e, 0xb90, - 0xb92, 0xb95, - 0xb99, 0xb9a, - 0xb9c, 0xb9c, - 0xb9e, 0xb9f, - 0xba3, 0xba4, - 0xba8, 0xbaa, - 0xbae, 0xbb9, - 0xbd0, 0xbd0, - 0xbe6, 0xbf2, - 0xc05, 0xc0c, - 0xc0e, 0xc10, - 0xc12, 0xc28, - 0xc2a, 0xc39, - 0xc3d, 0xc3d, - 0xc58, 0xc59, - 0xc60, 0xc61, - 0xc66, 0xc6f, - 0xc78, 0xc7e, - 0xc85, 0xc8c, - 0xc8e, 0xc90, - 0xc92, 0xca8, - 0xcaa, 0xcb3, - 0xcb5, 0xcb9, - 0xcbd, 0xcbd, - 0xcde, 0xcde, - 0xce0, 0xce1, - 0xce6, 0xcef, - 0xcf1, 0xcf2, - 0xd05, 0xd0c, - 0xd0e, 0xd10, - 0xd12, 0xd3a, - 0xd3d, 0xd3d, - 0xd4e, 0xd4e, - 0xd60, 0xd61, - 0xd66, 0xd75, - 0xd7a, 0xd7f, - 0xd85, 0xd96, - 0xd9a, 0xdb1, - 0xdb3, 0xdbb, - 0xdbd, 0xdbd, - 0xdc0, 0xdc6, - 0xde6, 0xdef, - 0xe01, 0xe30, - 0xe32, 0xe33, - 0xe40, 0xe46, - 0xe50, 0xe59, - 0xe81, 0xe82, - 0xe84, 0xe84, - 0xe87, 0xe88, - 0xe8a, 0xe8a, - 0xe8d, 0xe8d, - 0xe94, 0xe97, - 0xe99, 0xe9f, - 0xea1, 0xea3, - 0xea5, 0xea5, - 0xea7, 0xea7, - 0xeaa, 0xeab, - 0xead, 0xeb0, - 0xeb2, 0xeb3, - 0xebd, 0xebd, - 0xec0, 0xec4, - 0xec6, 0xec6, - 0xed0, 0xed9, - 0xedc, 0xedf, - 0xf00, 0xf00, - 0xf20, 0xf33, - 0xf40, 0xf47, - 0xf49, 0xf6c, - 0xf88, 0xf8c, - 0x1000, 0x102a, - 0x103f, 0x1049, - 0x1050, 0x1055, - 0x105a, 0x105d, - 0x1061, 0x1061, - 0x1065, 0x1066, - 0x106e, 0x1070, - 0x1075, 0x1081, - 0x108e, 0x108e, - 0x1090, 0x1099, - 0x10a0, 0x10c5, - 0x10c7, 0x10c7, - 0x10cd, 0x10cd, - 0x10d0, 0x10fa, - 0x10fc, 0x1248, - 0x124a, 0x124d, - 0x1250, 0x1256, - 0x1258, 0x1258, - 0x125a, 0x125d, - 0x1260, 0x1288, - 0x128a, 0x128d, - 0x1290, 0x12b0, - 0x12b2, 0x12b5, - 0x12b8, 0x12be, - 0x12c0, 0x12c0, - 0x12c2, 0x12c5, - 0x12c8, 0x12d6, - 0x12d8, 0x1310, - 0x1312, 0x1315, - 0x1318, 0x135a, - 0x1369, 0x137c, - 0x1380, 0x138f, - 0x13a0, 0x13f4, - 0x1401, 0x166c, - 0x166f, 0x167f, - 0x1681, 0x169a, - 0x16a0, 0x16ea, - 0x16ee, 0x16f8, - 0x1700, 0x170c, - 0x170e, 0x1711, - 0x1720, 0x1731, - 0x1740, 0x1751, - 0x1760, 0x176c, - 0x176e, 0x1770, - 0x1780, 0x17b3, - 0x17d7, 0x17d7, - 0x17dc, 0x17dc, - 0x17e0, 0x17e9, - 0x17f0, 0x17f9, - 0x1810, 0x1819, - 0x1820, 0x1877, - 0x1880, 0x18a8, - 0x18aa, 0x18aa, - 0x18b0, 0x18f5, - 0x1900, 0x191e, - 0x1946, 0x196d, - 0x1970, 0x1974, - 0x1980, 0x19ab, - 0x19c1, 0x19c7, - 0x19d0, 0x19da, - 0x1a00, 0x1a16, - 0x1a20, 0x1a54, - 0x1a80, 0x1a89, - 0x1a90, 0x1a99, - 0x1aa7, 0x1aa7, - 0x1b05, 0x1b33, - 0x1b45, 0x1b4b, - 0x1b50, 0x1b59, - 0x1b83, 0x1ba0, - 0x1bae, 0x1be5, - 0x1c00, 0x1c23, - 0x1c40, 0x1c49, - 0x1c4d, 0x1c7d, - 0x1ce9, 0x1cec, - 0x1cee, 0x1cf1, - 0x1cf5, 0x1cf6, - 0x1d00, 0x1dbf, - 0x1e00, 0x1f15, - 0x1f18, 0x1f1d, - 0x1f20, 0x1f45, - 0x1f48, 0x1f4d, - 0x1f50, 0x1f57, - 0x1f59, 0x1f59, - 0x1f5b, 0x1f5b, - 0x1f5d, 0x1f5d, - 0x1f5f, 0x1f7d, - 0x1f80, 0x1fb4, - 0x1fb6, 0x1fbc, - 0x1fbe, 0x1fbe, - 0x1fc2, 0x1fc4, - 0x1fc6, 0x1fcc, - 0x1fd0, 0x1fd3, - 0x1fd6, 0x1fdb, - 0x1fe0, 0x1fec, - 0x1ff2, 0x1ff4, - 0x1ff6, 0x1ffc, - 0x2070, 0x2071, - 0x2074, 0x2079, - 0x207f, 0x2089, - 0x2090, 0x209c, - 0x2102, 0x2102, - 0x2107, 0x2107, - 0x210a, 0x2113, - 0x2115, 0x2115, - 0x2119, 0x211d, - 0x2124, 0x2124, - 0x2126, 0x2126, - 0x2128, 0x2128, - 0x212a, 0x212d, - 0x212f, 0x2139, - 0x213c, 0x213f, - 0x2145, 0x2149, - 0x214e, 0x214e, - 0x2150, 0x2189, - 0x2460, 0x249b, - 0x24ea, 0x24ff, - 0x2776, 0x2793, - 0x2c00, 0x2c2e, - 0x2c30, 0x2c5e, - 0x2c60, 0x2ce4, - 0x2ceb, 0x2cee, - 0x2cf2, 0x2cf3, - 0x2cfd, 0x2cfd, - 0x2d00, 0x2d25, - 0x2d27, 0x2d27, - 0x2d2d, 0x2d2d, - 0x2d30, 0x2d67, - 0x2d6f, 0x2d6f, - 0x2d80, 0x2d96, - 0x2da0, 0x2da6, - 0x2da8, 0x2dae, - 0x2db0, 0x2db6, - 0x2db8, 0x2dbe, - 0x2dc0, 0x2dc6, - 0x2dc8, 0x2dce, - 0x2dd0, 0x2dd6, - 0x2dd8, 0x2dde, - 0x2e2f, 0x2e2f, - 0x3005, 0x3007, - 0x3021, 0x3029, - 0x3031, 0x3035, - 0x3038, 0x303c, - 0x3041, 0x3096, - 0x309d, 0x309f, - 0x30a1, 0x30fa, - 0x30fc, 0x30ff, - 0x3105, 0x312d, - 0x3131, 0x318e, - 0x3192, 0x3195, - 0x31a0, 0x31ba, - 0x31f0, 0x31ff, - 0x3220, 0x3229, - 0x3248, 0x324f, - 0x3251, 0x325f, - 0x3280, 0x3289, - 0x32b1, 0x32bf, - 0x3400, 0x4db5, - 0x4e00, 0x9fcc, - 0xa000, 0xa48c, - 0xa4d0, 0xa4fd, - 0xa500, 0xa60c, - 0xa610, 0xa62b, - 0xa640, 0xa66e, - 0xa67f, 0xa69d, - 0xa6a0, 0xa6ef, - 0xa717, 0xa71f, - 0xa722, 0xa788, - 0xa78b, 0xa78e, - 0xa790, 0xa7ad, - 0xa7b0, 0xa7b1, - 0xa7f7, 0xa801, - 0xa803, 0xa805, - 0xa807, 0xa80a, - 0xa80c, 0xa822, - 0xa830, 0xa835, - 0xa840, 0xa873, - 0xa882, 0xa8b3, - 0xa8d0, 0xa8d9, - 0xa8f2, 0xa8f7, - 0xa8fb, 0xa8fb, - 0xa900, 0xa925, - 0xa930, 0xa946, - 0xa960, 0xa97c, - 0xa984, 0xa9b2, - 0xa9cf, 0xa9d9, - 0xa9e0, 0xa9e4, - 0xa9e6, 0xa9fe, - 0xaa00, 0xaa28, - 0xaa40, 0xaa42, - 0xaa44, 0xaa4b, - 0xaa50, 0xaa59, - 0xaa60, 0xaa76, - 0xaa7a, 0xaa7a, - 0xaa7e, 0xaaaf, - 0xaab1, 0xaab1, - 0xaab5, 0xaab6, - 0xaab9, 0xaabd, - 0xaac0, 0xaac0, - 0xaac2, 0xaac2, - 0xaadb, 0xaadd, - 0xaae0, 0xaaea, - 0xaaf2, 0xaaf4, - 0xab01, 0xab06, - 0xab09, 0xab0e, - 0xab11, 0xab16, - 0xab20, 0xab26, - 0xab28, 0xab2e, - 0xab30, 0xab5a, - 0xab5c, 0xab5f, - 0xab64, 0xab65, - 0xabc0, 0xabe2, - 0xabf0, 0xabf9, - 0xac00, 0xd7a3, - 0xd7b0, 0xd7c6, - 0xd7cb, 0xd7fb, - 0xf900, 0xfa6d, - 0xfa70, 0xfad9, - 0xfb00, 0xfb06, - 0xfb13, 0xfb17, - 0xfb1d, 0xfb1d, - 0xfb1f, 0xfb28, - 0xfb2a, 0xfb36, - 0xfb38, 0xfb3c, - 0xfb3e, 0xfb3e, - 0xfb40, 0xfb41, - 0xfb43, 0xfb44, - 0xfb46, 0xfbb1, - 0xfbd3, 0xfd3d, - 0xfd50, 0xfd8f, - 0xfd92, 0xfdc7, - 0xfdf0, 0xfdfb, - 0xfe70, 0xfe74, - 0xfe76, 0xfefc, - 0xff10, 0xff19, - 0xff21, 0xff3a, - 0xff41, 0xff5a, - 0xff66, 0xffbe, - 0xffc2, 0xffc7, - 0xffca, 0xffcf, - 0xffd2, 0xffd7, - 0xffda, 0xffdc, - 0x10000, 0x1000b, - 0x1000d, 0x10026, - 0x10028, 0x1003a, - 0x1003c, 0x1003d, - 0x1003f, 0x1004d, - 0x10050, 0x1005d, - 0x10080, 0x100fa, - 0x10107, 0x10133, - 0x10140, 0x10178, - 0x1018a, 0x1018b, - 0x10280, 0x1029c, - 0x102a0, 0x102d0, - 0x102e1, 0x102fb, - 0x10300, 0x10323, - 0x10330, 0x1034a, - 0x10350, 0x10375, - 0x10380, 0x1039d, - 0x103a0, 0x103c3, - 0x103c8, 0x103cf, - 0x103d1, 0x103d5, - 0x10400, 0x1049d, - 0x104a0, 0x104a9, - 0x10500, 0x10527, - 0x10530, 0x10563, - 0x10600, 0x10736, - 0x10740, 0x10755, - 0x10760, 0x10767, - 0x10800, 0x10805, - 0x10808, 0x10808, - 0x1080a, 0x10835, - 0x10837, 0x10838, - 0x1083c, 0x1083c, - 0x1083f, 0x10855, - 0x10858, 0x10876, - 0x10879, 0x1089e, - 0x108a7, 0x108af, - 0x10900, 0x1091b, - 0x10920, 0x10939, - 0x10980, 0x109b7, - 0x109be, 0x109bf, - 0x10a00, 0x10a00, - 0x10a10, 0x10a13, - 0x10a15, 0x10a17, - 0x10a19, 0x10a33, - 0x10a40, 0x10a47, - 0x10a60, 0x10a7e, - 0x10a80, 0x10a9f, - 0x10ac0, 0x10ac7, - 0x10ac9, 0x10ae4, - 0x10aeb, 0x10aef, - 0x10b00, 0x10b35, - 0x10b40, 0x10b55, - 0x10b58, 0x10b72, - 0x10b78, 0x10b91, - 0x10ba9, 0x10baf, - 0x10c00, 0x10c48, - 0x10e60, 0x10e7e, - 0x11003, 0x11037, - 0x11052, 0x1106f, - 0x11083, 0x110af, - 0x110d0, 0x110e8, - 0x110f0, 0x110f9, - 0x11103, 0x11126, - 0x11136, 0x1113f, - 0x11150, 0x11172, - 0x11176, 0x11176, - 0x11183, 0x111b2, - 0x111c1, 0x111c4, - 0x111d0, 0x111da, - 0x111e1, 0x111f4, - 0x11200, 0x11211, - 0x11213, 0x1122b, - 0x112b0, 0x112de, - 0x112f0, 0x112f9, - 0x11305, 0x1130c, - 0x1130f, 0x11310, - 0x11313, 0x11328, - 0x1132a, 0x11330, - 0x11332, 0x11333, - 0x11335, 0x11339, - 0x1133d, 0x1133d, - 0x1135d, 0x11361, - 0x11480, 0x114af, - 0x114c4, 0x114c5, - 0x114c7, 0x114c7, - 0x114d0, 0x114d9, - 0x11580, 0x115ae, - 0x11600, 0x1162f, - 0x11644, 0x11644, - 0x11650, 0x11659, - 0x11680, 0x116aa, - 0x116c0, 0x116c9, - 0x118a0, 0x118f2, - 0x118ff, 0x118ff, - 0x11ac0, 0x11af8, - 0x12000, 0x12398, - 0x12400, 0x1246e, - 0x13000, 0x1342e, - 0x16800, 0x16a38, - 0x16a40, 0x16a5e, - 0x16a60, 0x16a69, - 0x16ad0, 0x16aed, - 0x16b00, 0x16b2f, - 0x16b40, 0x16b43, - 0x16b50, 0x16b59, - 0x16b5b, 0x16b61, - 0x16b63, 0x16b77, - 0x16b7d, 0x16b8f, - 0x16f00, 0x16f44, - 0x16f50, 0x16f50, - 0x16f93, 0x16f9f, - 0x1b000, 0x1b001, - 0x1bc00, 0x1bc6a, - 0x1bc70, 0x1bc7c, - 0x1bc80, 0x1bc88, - 0x1bc90, 0x1bc99, - 0x1d360, 0x1d371, - 0x1d400, 0x1d454, - 0x1d456, 0x1d49c, - 0x1d49e, 0x1d49f, - 0x1d4a2, 0x1d4a2, - 0x1d4a5, 0x1d4a6, - 0x1d4a9, 0x1d4ac, - 0x1d4ae, 0x1d4b9, - 0x1d4bb, 0x1d4bb, - 0x1d4bd, 0x1d4c3, - 0x1d4c5, 0x1d505, - 0x1d507, 0x1d50a, - 0x1d50d, 0x1d514, - 0x1d516, 0x1d51c, - 0x1d51e, 0x1d539, - 0x1d53b, 0x1d53e, - 0x1d540, 0x1d544, - 0x1d546, 0x1d546, - 0x1d54a, 0x1d550, - 0x1d552, 0x1d6a5, - 0x1d6a8, 0x1d6c0, - 0x1d6c2, 0x1d6da, - 0x1d6dc, 0x1d6fa, - 0x1d6fc, 0x1d714, - 0x1d716, 0x1d734, - 0x1d736, 0x1d74e, - 0x1d750, 0x1d76e, - 0x1d770, 0x1d788, - 0x1d78a, 0x1d7a8, - 0x1d7aa, 0x1d7c2, - 0x1d7c4, 0x1d7cb, - 0x1d7ce, 0x1d7ff, - 0x1e800, 0x1e8c4, - 0x1e8c7, 0x1e8cf, - 0x1ee00, 0x1ee03, - 0x1ee05, 0x1ee1f, - 0x1ee21, 0x1ee22, - 0x1ee24, 0x1ee24, - 0x1ee27, 0x1ee27, - 0x1ee29, 0x1ee32, - 0x1ee34, 0x1ee37, - 0x1ee39, 0x1ee39, - 0x1ee3b, 0x1ee3b, - 0x1ee42, 0x1ee42, - 0x1ee47, 0x1ee47, - 0x1ee49, 0x1ee49, - 0x1ee4b, 0x1ee4b, - 0x1ee4d, 0x1ee4f, - 0x1ee51, 0x1ee52, - 0x1ee54, 0x1ee54, - 0x1ee57, 0x1ee57, - 0x1ee59, 0x1ee59, - 0x1ee5b, 0x1ee5b, - 0x1ee5d, 0x1ee5d, - 0x1ee5f, 0x1ee5f, - 0x1ee61, 0x1ee62, - 0x1ee64, 0x1ee64, - 0x1ee67, 0x1ee6a, - 0x1ee6c, 0x1ee72, - 0x1ee74, 0x1ee77, - 0x1ee79, 0x1ee7c, - 0x1ee7e, 0x1ee7e, - 0x1ee80, 0x1ee89, - 0x1ee8b, 0x1ee9b, - 0x1eea1, 0x1eea3, - 0x1eea5, 0x1eea9, - 0x1eeab, 0x1eebb, - 0x1f100, 0x1f10c, - 0x20000, 0x2a6d6, - 0x2a700, 0x2b734, - 0x2b740, 0x2b81d, - 0x2f800, 0x2fa1d -}; -UCP_FN(Xan) - -static const unichar ucp_Xps_def[] = { - 0x9, 0xd, - 0x20, 0x20, - 0xa0, 0xa0, - 0x1680, 0x1680, - 0x2000, 0x200a, - 0x2028, 0x2029, - 0x202f, 0x202f, - 0x205f, 0x205f, - 0x3000, 0x3000 -}; -UCP_FN(Xps) - -static const unichar ucp_Xsp_def[] = { - 0x9, 0xa, - 0xc, 0xd, - 0x20, 0x20, - 0xa0, 0xa0, - 0x1680, 0x1680, - 0x2000, 0x200a, - 0x2028, 0x2029, - 0x202f, 0x202f, - 0x205f, 0x205f, - 0x3000, 0x3000 -}; -UCP_FN(Xsp) - -static const unichar ucp_Xwd_def[] = { - 0x30, 0x39, - 0x41, 0x5a, - 0x5f, 0x5f, - 0x61, 0x7a, - 0xaa, 0xaa, - 0xb2, 0xb3, - 0xb5, 0xb5, - 0xb9, 0xba, - 0xbc, 0xbe, - 0xc0, 0xd6, - 0xd8, 0xf6, - 0xf8, 0x2c1, - 0x2c6, 0x2d1, - 0x2e0, 0x2e4, - 0x2ec, 0x2ec, - 0x2ee, 0x2ee, - 0x370, 0x374, - 0x376, 0x377, - 0x37a, 0x37d, - 0x37f, 0x37f, - 0x386, 0x386, - 0x388, 0x38a, - 0x38c, 0x38c, - 0x38e, 0x3a1, - 0x3a3, 0x3f5, - 0x3f7, 0x481, - 0x48a, 0x52f, - 0x531, 0x556, - 0x559, 0x559, - 0x561, 0x587, - 0x5d0, 0x5ea, - 0x5f0, 0x5f2, - 0x620, 0x64a, - 0x660, 0x669, - 0x66e, 0x66f, - 0x671, 0x6d3, - 0x6d5, 0x6d5, - 0x6e5, 0x6e6, - 0x6ee, 0x6fc, - 0x6ff, 0x6ff, - 0x710, 0x710, - 0x712, 0x72f, - 0x74d, 0x7a5, - 0x7b1, 0x7b1, - 0x7c0, 0x7ea, - 0x7f4, 0x7f5, - 0x7fa, 0x7fa, - 0x800, 0x815, - 0x81a, 0x81a, - 0x824, 0x824, - 0x828, 0x828, - 0x840, 0x858, - 0x8a0, 0x8b2, - 0x904, 0x939, - 0x93d, 0x93d, - 0x950, 0x950, - 0x958, 0x961, - 0x966, 0x96f, - 0x971, 0x980, - 0x985, 0x98c, - 0x98f, 0x990, - 0x993, 0x9a8, - 0x9aa, 0x9b0, - 0x9b2, 0x9b2, - 0x9b6, 0x9b9, - 0x9bd, 0x9bd, - 0x9ce, 0x9ce, - 0x9dc, 0x9dd, - 0x9df, 0x9e1, - 0x9e6, 0x9f1, - 0x9f4, 0x9f9, - 0xa05, 0xa0a, - 0xa0f, 0xa10, - 0xa13, 0xa28, - 0xa2a, 0xa30, - 0xa32, 0xa33, - 0xa35, 0xa36, - 0xa38, 0xa39, - 0xa59, 0xa5c, - 0xa5e, 0xa5e, - 0xa66, 0xa6f, - 0xa72, 0xa74, - 0xa85, 0xa8d, - 0xa8f, 0xa91, - 0xa93, 0xaa8, - 0xaaa, 0xab0, - 0xab2, 0xab3, - 0xab5, 0xab9, - 0xabd, 0xabd, - 0xad0, 0xad0, - 0xae0, 0xae1, - 0xae6, 0xaef, - 0xb05, 0xb0c, - 0xb0f, 0xb10, - 0xb13, 0xb28, - 0xb2a, 0xb30, - 0xb32, 0xb33, - 0xb35, 0xb39, - 0xb3d, 0xb3d, - 0xb5c, 0xb5d, - 0xb5f, 0xb61, - 0xb66, 0xb6f, - 0xb71, 0xb77, - 0xb83, 0xb83, - 0xb85, 0xb8a, - 0xb8e, 0xb90, - 0xb92, 0xb95, - 0xb99, 0xb9a, - 0xb9c, 0xb9c, - 0xb9e, 0xb9f, - 0xba3, 0xba4, - 0xba8, 0xbaa, - 0xbae, 0xbb9, - 0xbd0, 0xbd0, - 0xbe6, 0xbf2, - 0xc05, 0xc0c, - 0xc0e, 0xc10, - 0xc12, 0xc28, - 0xc2a, 0xc39, - 0xc3d, 0xc3d, - 0xc58, 0xc59, - 0xc60, 0xc61, - 0xc66, 0xc6f, - 0xc78, 0xc7e, - 0xc85, 0xc8c, - 0xc8e, 0xc90, - 0xc92, 0xca8, - 0xcaa, 0xcb3, - 0xcb5, 0xcb9, - 0xcbd, 0xcbd, - 0xcde, 0xcde, - 0xce0, 0xce1, - 0xce6, 0xcef, - 0xcf1, 0xcf2, - 0xd05, 0xd0c, - 0xd0e, 0xd10, - 0xd12, 0xd3a, - 0xd3d, 0xd3d, - 0xd4e, 0xd4e, - 0xd60, 0xd61, - 0xd66, 0xd75, - 0xd7a, 0xd7f, - 0xd85, 0xd96, - 0xd9a, 0xdb1, - 0xdb3, 0xdbb, - 0xdbd, 0xdbd, - 0xdc0, 0xdc6, - 0xde6, 0xdef, - 0xe01, 0xe30, - 0xe32, 0xe33, - 0xe40, 0xe46, - 0xe50, 0xe59, - 0xe81, 0xe82, - 0xe84, 0xe84, - 0xe87, 0xe88, - 0xe8a, 0xe8a, - 0xe8d, 0xe8d, - 0xe94, 0xe97, - 0xe99, 0xe9f, - 0xea1, 0xea3, - 0xea5, 0xea5, - 0xea7, 0xea7, - 0xeaa, 0xeab, - 0xead, 0xeb0, - 0xeb2, 0xeb3, - 0xebd, 0xebd, - 0xec0, 0xec4, - 0xec6, 0xec6, - 0xed0, 0xed9, - 0xedc, 0xedf, - 0xf00, 0xf00, - 0xf20, 0xf33, - 0xf40, 0xf47, - 0xf49, 0xf6c, - 0xf88, 0xf8c, - 0x1000, 0x102a, - 0x103f, 0x1049, - 0x1050, 0x1055, - 0x105a, 0x105d, - 0x1061, 0x1061, - 0x1065, 0x1066, - 0x106e, 0x1070, - 0x1075, 0x1081, - 0x108e, 0x108e, - 0x1090, 0x1099, - 0x10a0, 0x10c5, - 0x10c7, 0x10c7, - 0x10cd, 0x10cd, - 0x10d0, 0x10fa, - 0x10fc, 0x1248, - 0x124a, 0x124d, - 0x1250, 0x1256, - 0x1258, 0x1258, - 0x125a, 0x125d, - 0x1260, 0x1288, - 0x128a, 0x128d, - 0x1290, 0x12b0, - 0x12b2, 0x12b5, - 0x12b8, 0x12be, - 0x12c0, 0x12c0, - 0x12c2, 0x12c5, - 0x12c8, 0x12d6, - 0x12d8, 0x1310, - 0x1312, 0x1315, - 0x1318, 0x135a, - 0x1369, 0x137c, - 0x1380, 0x138f, - 0x13a0, 0x13f4, - 0x1401, 0x166c, - 0x166f, 0x167f, - 0x1681, 0x169a, - 0x16a0, 0x16ea, - 0x16ee, 0x16f8, - 0x1700, 0x170c, - 0x170e, 0x1711, - 0x1720, 0x1731, - 0x1740, 0x1751, - 0x1760, 0x176c, - 0x176e, 0x1770, - 0x1780, 0x17b3, - 0x17d7, 0x17d7, - 0x17dc, 0x17dc, - 0x17e0, 0x17e9, - 0x17f0, 0x17f9, - 0x1810, 0x1819, - 0x1820, 0x1877, - 0x1880, 0x18a8, - 0x18aa, 0x18aa, - 0x18b0, 0x18f5, - 0x1900, 0x191e, - 0x1946, 0x196d, - 0x1970, 0x1974, - 0x1980, 0x19ab, - 0x19c1, 0x19c7, - 0x19d0, 0x19da, - 0x1a00, 0x1a16, - 0x1a20, 0x1a54, - 0x1a80, 0x1a89, - 0x1a90, 0x1a99, - 0x1aa7, 0x1aa7, - 0x1b05, 0x1b33, - 0x1b45, 0x1b4b, - 0x1b50, 0x1b59, - 0x1b83, 0x1ba0, - 0x1bae, 0x1be5, - 0x1c00, 0x1c23, - 0x1c40, 0x1c49, - 0x1c4d, 0x1c7d, - 0x1ce9, 0x1cec, - 0x1cee, 0x1cf1, - 0x1cf5, 0x1cf6, - 0x1d00, 0x1dbf, - 0x1e00, 0x1f15, - 0x1f18, 0x1f1d, - 0x1f20, 0x1f45, - 0x1f48, 0x1f4d, - 0x1f50, 0x1f57, - 0x1f59, 0x1f59, - 0x1f5b, 0x1f5b, - 0x1f5d, 0x1f5d, - 0x1f5f, 0x1f7d, - 0x1f80, 0x1fb4, - 0x1fb6, 0x1fbc, - 0x1fbe, 0x1fbe, - 0x1fc2, 0x1fc4, - 0x1fc6, 0x1fcc, - 0x1fd0, 0x1fd3, - 0x1fd6, 0x1fdb, - 0x1fe0, 0x1fec, - 0x1ff2, 0x1ff4, - 0x1ff6, 0x1ffc, - 0x2070, 0x2071, - 0x2074, 0x2079, - 0x207f, 0x2089, - 0x2090, 0x209c, - 0x2102, 0x2102, - 0x2107, 0x2107, - 0x210a, 0x2113, - 0x2115, 0x2115, - 0x2119, 0x211d, - 0x2124, 0x2124, - 0x2126, 0x2126, - 0x2128, 0x2128, - 0x212a, 0x212d, - 0x212f, 0x2139, - 0x213c, 0x213f, - 0x2145, 0x2149, - 0x214e, 0x214e, - 0x2150, 0x2189, - 0x2460, 0x249b, - 0x24ea, 0x24ff, - 0x2776, 0x2793, - 0x2c00, 0x2c2e, - 0x2c30, 0x2c5e, - 0x2c60, 0x2ce4, - 0x2ceb, 0x2cee, - 0x2cf2, 0x2cf3, - 0x2cfd, 0x2cfd, - 0x2d00, 0x2d25, - 0x2d27, 0x2d27, - 0x2d2d, 0x2d2d, - 0x2d30, 0x2d67, - 0x2d6f, 0x2d6f, - 0x2d80, 0x2d96, - 0x2da0, 0x2da6, - 0x2da8, 0x2dae, - 0x2db0, 0x2db6, - 0x2db8, 0x2dbe, - 0x2dc0, 0x2dc6, - 0x2dc8, 0x2dce, - 0x2dd0, 0x2dd6, - 0x2dd8, 0x2dde, - 0x2e2f, 0x2e2f, - 0x3005, 0x3007, - 0x3021, 0x3029, - 0x3031, 0x3035, - 0x3038, 0x303c, - 0x3041, 0x3096, - 0x309d, 0x309f, - 0x30a1, 0x30fa, - 0x30fc, 0x30ff, - 0x3105, 0x312d, - 0x3131, 0x318e, - 0x3192, 0x3195, - 0x31a0, 0x31ba, - 0x31f0, 0x31ff, - 0x3220, 0x3229, - 0x3248, 0x324f, - 0x3251, 0x325f, - 0x3280, 0x3289, - 0x32b1, 0x32bf, - 0x3400, 0x4db5, - 0x4e00, 0x9fcc, - 0xa000, 0xa48c, - 0xa4d0, 0xa4fd, - 0xa500, 0xa60c, - 0xa610, 0xa62b, - 0xa640, 0xa66e, - 0xa67f, 0xa69d, - 0xa6a0, 0xa6ef, - 0xa717, 0xa71f, - 0xa722, 0xa788, - 0xa78b, 0xa78e, - 0xa790, 0xa7ad, - 0xa7b0, 0xa7b1, - 0xa7f7, 0xa801, - 0xa803, 0xa805, - 0xa807, 0xa80a, - 0xa80c, 0xa822, - 0xa830, 0xa835, - 0xa840, 0xa873, - 0xa882, 0xa8b3, - 0xa8d0, 0xa8d9, - 0xa8f2, 0xa8f7, - 0xa8fb, 0xa8fb, - 0xa900, 0xa925, - 0xa930, 0xa946, - 0xa960, 0xa97c, - 0xa984, 0xa9b2, - 0xa9cf, 0xa9d9, - 0xa9e0, 0xa9e4, - 0xa9e6, 0xa9fe, - 0xaa00, 0xaa28, - 0xaa40, 0xaa42, - 0xaa44, 0xaa4b, - 0xaa50, 0xaa59, - 0xaa60, 0xaa76, - 0xaa7a, 0xaa7a, - 0xaa7e, 0xaaaf, - 0xaab1, 0xaab1, - 0xaab5, 0xaab6, - 0xaab9, 0xaabd, - 0xaac0, 0xaac0, - 0xaac2, 0xaac2, - 0xaadb, 0xaadd, - 0xaae0, 0xaaea, - 0xaaf2, 0xaaf4, - 0xab01, 0xab06, - 0xab09, 0xab0e, - 0xab11, 0xab16, - 0xab20, 0xab26, - 0xab28, 0xab2e, - 0xab30, 0xab5a, - 0xab5c, 0xab5f, - 0xab64, 0xab65, - 0xabc0, 0xabe2, - 0xabf0, 0xabf9, - 0xac00, 0xd7a3, - 0xd7b0, 0xd7c6, - 0xd7cb, 0xd7fb, - 0xf900, 0xfa6d, - 0xfa70, 0xfad9, - 0xfb00, 0xfb06, - 0xfb13, 0xfb17, - 0xfb1d, 0xfb1d, - 0xfb1f, 0xfb28, - 0xfb2a, 0xfb36, - 0xfb38, 0xfb3c, - 0xfb3e, 0xfb3e, - 0xfb40, 0xfb41, - 0xfb43, 0xfb44, - 0xfb46, 0xfbb1, - 0xfbd3, 0xfd3d, - 0xfd50, 0xfd8f, - 0xfd92, 0xfdc7, - 0xfdf0, 0xfdfb, - 0xfe70, 0xfe74, - 0xfe76, 0xfefc, - 0xff10, 0xff19, - 0xff21, 0xff3a, - 0xff41, 0xff5a, - 0xff66, 0xffbe, - 0xffc2, 0xffc7, - 0xffca, 0xffcf, - 0xffd2, 0xffd7, - 0xffda, 0xffdc, - 0x10000, 0x1000b, - 0x1000d, 0x10026, - 0x10028, 0x1003a, - 0x1003c, 0x1003d, - 0x1003f, 0x1004d, - 0x10050, 0x1005d, - 0x10080, 0x100fa, - 0x10107, 0x10133, - 0x10140, 0x10178, - 0x1018a, 0x1018b, - 0x10280, 0x1029c, - 0x102a0, 0x102d0, - 0x102e1, 0x102fb, - 0x10300, 0x10323, - 0x10330, 0x1034a, - 0x10350, 0x10375, - 0x10380, 0x1039d, - 0x103a0, 0x103c3, - 0x103c8, 0x103cf, - 0x103d1, 0x103d5, - 0x10400, 0x1049d, - 0x104a0, 0x104a9, - 0x10500, 0x10527, - 0x10530, 0x10563, - 0x10600, 0x10736, - 0x10740, 0x10755, - 0x10760, 0x10767, - 0x10800, 0x10805, - 0x10808, 0x10808, - 0x1080a, 0x10835, - 0x10837, 0x10838, - 0x1083c, 0x1083c, - 0x1083f, 0x10855, - 0x10858, 0x10876, - 0x10879, 0x1089e, - 0x108a7, 0x108af, - 0x10900, 0x1091b, - 0x10920, 0x10939, - 0x10980, 0x109b7, - 0x109be, 0x109bf, - 0x10a00, 0x10a00, - 0x10a10, 0x10a13, - 0x10a15, 0x10a17, - 0x10a19, 0x10a33, - 0x10a40, 0x10a47, - 0x10a60, 0x10a7e, - 0x10a80, 0x10a9f, - 0x10ac0, 0x10ac7, - 0x10ac9, 0x10ae4, - 0x10aeb, 0x10aef, - 0x10b00, 0x10b35, - 0x10b40, 0x10b55, - 0x10b58, 0x10b72, - 0x10b78, 0x10b91, - 0x10ba9, 0x10baf, - 0x10c00, 0x10c48, - 0x10e60, 0x10e7e, - 0x11003, 0x11037, - 0x11052, 0x1106f, - 0x11083, 0x110af, - 0x110d0, 0x110e8, - 0x110f0, 0x110f9, - 0x11103, 0x11126, - 0x11136, 0x1113f, - 0x11150, 0x11172, - 0x11176, 0x11176, - 0x11183, 0x111b2, - 0x111c1, 0x111c4, - 0x111d0, 0x111da, - 0x111e1, 0x111f4, - 0x11200, 0x11211, - 0x11213, 0x1122b, - 0x112b0, 0x112de, - 0x112f0, 0x112f9, - 0x11305, 0x1130c, - 0x1130f, 0x11310, - 0x11313, 0x11328, - 0x1132a, 0x11330, - 0x11332, 0x11333, - 0x11335, 0x11339, - 0x1133d, 0x1133d, - 0x1135d, 0x11361, - 0x11480, 0x114af, - 0x114c4, 0x114c5, - 0x114c7, 0x114c7, - 0x114d0, 0x114d9, - 0x11580, 0x115ae, - 0x11600, 0x1162f, - 0x11644, 0x11644, - 0x11650, 0x11659, - 0x11680, 0x116aa, - 0x116c0, 0x116c9, - 0x118a0, 0x118f2, - 0x118ff, 0x118ff, - 0x11ac0, 0x11af8, - 0x12000, 0x12398, - 0x12400, 0x1246e, - 0x13000, 0x1342e, - 0x16800, 0x16a38, - 0x16a40, 0x16a5e, - 0x16a60, 0x16a69, - 0x16ad0, 0x16aed, - 0x16b00, 0x16b2f, - 0x16b40, 0x16b43, - 0x16b50, 0x16b59, - 0x16b5b, 0x16b61, - 0x16b63, 0x16b77, - 0x16b7d, 0x16b8f, - 0x16f00, 0x16f44, - 0x16f50, 0x16f50, - 0x16f93, 0x16f9f, - 0x1b000, 0x1b001, - 0x1bc00, 0x1bc6a, - 0x1bc70, 0x1bc7c, - 0x1bc80, 0x1bc88, - 0x1bc90, 0x1bc99, - 0x1d360, 0x1d371, - 0x1d400, 0x1d454, - 0x1d456, 0x1d49c, - 0x1d49e, 0x1d49f, - 0x1d4a2, 0x1d4a2, - 0x1d4a5, 0x1d4a6, - 0x1d4a9, 0x1d4ac, - 0x1d4ae, 0x1d4b9, - 0x1d4bb, 0x1d4bb, - 0x1d4bd, 0x1d4c3, - 0x1d4c5, 0x1d505, - 0x1d507, 0x1d50a, - 0x1d50d, 0x1d514, - 0x1d516, 0x1d51c, - 0x1d51e, 0x1d539, - 0x1d53b, 0x1d53e, - 0x1d540, 0x1d544, - 0x1d546, 0x1d546, - 0x1d54a, 0x1d550, - 0x1d552, 0x1d6a5, - 0x1d6a8, 0x1d6c0, - 0x1d6c2, 0x1d6da, - 0x1d6dc, 0x1d6fa, - 0x1d6fc, 0x1d714, - 0x1d716, 0x1d734, - 0x1d736, 0x1d74e, - 0x1d750, 0x1d76e, - 0x1d770, 0x1d788, - 0x1d78a, 0x1d7a8, - 0x1d7aa, 0x1d7c2, - 0x1d7c4, 0x1d7cb, - 0x1d7ce, 0x1d7ff, - 0x1e800, 0x1e8c4, - 0x1e8c7, 0x1e8cf, - 0x1ee00, 0x1ee03, - 0x1ee05, 0x1ee1f, - 0x1ee21, 0x1ee22, - 0x1ee24, 0x1ee24, - 0x1ee27, 0x1ee27, - 0x1ee29, 0x1ee32, - 0x1ee34, 0x1ee37, - 0x1ee39, 0x1ee39, - 0x1ee3b, 0x1ee3b, - 0x1ee42, 0x1ee42, - 0x1ee47, 0x1ee47, - 0x1ee49, 0x1ee49, - 0x1ee4b, 0x1ee4b, - 0x1ee4d, 0x1ee4f, - 0x1ee51, 0x1ee52, - 0x1ee54, 0x1ee54, - 0x1ee57, 0x1ee57, - 0x1ee59, 0x1ee59, - 0x1ee5b, 0x1ee5b, - 0x1ee5d, 0x1ee5d, - 0x1ee5f, 0x1ee5f, - 0x1ee61, 0x1ee62, - 0x1ee64, 0x1ee64, - 0x1ee67, 0x1ee6a, - 0x1ee6c, 0x1ee72, - 0x1ee74, 0x1ee77, - 0x1ee79, 0x1ee7c, - 0x1ee7e, 0x1ee7e, - 0x1ee80, 0x1ee89, - 0x1ee8b, 0x1ee9b, - 0x1eea1, 0x1eea3, - 0x1eea5, 0x1eea9, - 0x1eeab, 0x1eebb, - 0x1f100, 0x1f10c, - 0x20000, 0x2a6d6, - 0x2a700, 0x2b734, - 0x2b740, 0x2b81d, - 0x2f800, 0x2fa1d -}; -UCP_FN(Xwd) - -static const unichar ucp_Z_def[] = { - 0x20, 0x20, - 0xa0, 0xa0, - 0x1680, 0x1680, - 0x2000, 0x200a, - 0x2028, 0x2029, - 0x202f, 0x202f, - 0x205f, 0x205f, - 0x3000, 0x3000 -}; -UCP_FN(Z) - -static const unichar ucp_Zl_def[] = { - 0x2028, 0x2028 -}; -UCP_FN(Zl) - -static const unichar ucp_Zp_def[] = { - 0x2029, 0x2029 -}; -UCP_FN(Zp) - -static const unichar ucp_Zs_def[] = { - 0x20, 0x20, - 0xa0, 0xa0, - 0x1680, 0x1680, - 0x2000, 0x200a, - 0x202f, 0x202f, - 0x205f, 0x205f, - 0x3000, 0x3000 -}; -UCP_FN(Zs) - -static const unichar ucp_Arabic_def[] = { - 0x600, 0x604, - 0x606, 0x60b, - 0x60d, 0x61a, - 0x61e, 0x61e, - 0x620, 0x63f, - 0x641, 0x64a, - 0x656, 0x65f, - 0x66a, 0x66f, - 0x671, 0x6dc, - 0x6de, 0x6ff, - 0x750, 0x77f, - 0x8a0, 0x8b2, - 0x8e4, 0x8ff, - 0xfb50, 0xfbc1, - 0xfbd3, 0xfd3d, - 0xfd50, 0xfd8f, - 0xfd92, 0xfdc7, - 0xfdf0, 0xfdfd, - 0xfe70, 0xfe74, - 0xfe76, 0xfefc, - 0x10e60, 0x10e7e, - 0x1ee00, 0x1ee03, - 0x1ee05, 0x1ee1f, - 0x1ee21, 0x1ee22, - 0x1ee24, 0x1ee24, - 0x1ee27, 0x1ee27, - 0x1ee29, 0x1ee32, - 0x1ee34, 0x1ee37, - 0x1ee39, 0x1ee39, - 0x1ee3b, 0x1ee3b, - 0x1ee42, 0x1ee42, - 0x1ee47, 0x1ee47, - 0x1ee49, 0x1ee49, - 0x1ee4b, 0x1ee4b, - 0x1ee4d, 0x1ee4f, - 0x1ee51, 0x1ee52, - 0x1ee54, 0x1ee54, - 0x1ee57, 0x1ee57, - 0x1ee59, 0x1ee59, - 0x1ee5b, 0x1ee5b, - 0x1ee5d, 0x1ee5d, - 0x1ee5f, 0x1ee5f, - 0x1ee61, 0x1ee62, - 0x1ee64, 0x1ee64, - 0x1ee67, 0x1ee6a, - 0x1ee6c, 0x1ee72, - 0x1ee74, 0x1ee77, - 0x1ee79, 0x1ee7c, - 0x1ee7e, 0x1ee7e, - 0x1ee80, 0x1ee89, - 0x1ee8b, 0x1ee9b, - 0x1eea1, 0x1eea3, - 0x1eea5, 0x1eea9, - 0x1eeab, 0x1eebb, - 0x1eef0, 0x1eef1 -}; -UCP_FN(Arabic) - -static const unichar ucp_Armenian_def[] = { - 0x531, 0x556, - 0x559, 0x55f, - 0x561, 0x587, - 0x58a, 0x58a, - 0x58d, 0x58f, - 0xfb13, 0xfb17 -}; -UCP_FN(Armenian) - -static const unichar ucp_Avestan_def[] = { - 0x10b00, 0x10b35, - 0x10b39, 0x10b3f -}; -UCP_FN(Avestan) - -static const unichar ucp_Balinese_def[] = { - 0x1b00, 0x1b4b, - 0x1b50, 0x1b7c -}; -UCP_FN(Balinese) - -static const unichar ucp_Bamum_def[] = { - 0xa6a0, 0xa6f7, - 0x16800, 0x16a38 -}; -UCP_FN(Bamum) - -static const unichar ucp_Bassa_Vah_def[] = { - 0x16ad0, 0x16aed, - 0x16af0, 0x16af5 -}; -UCP_FN(Bassa_Vah) - -static const unichar ucp_Batak_def[] = { - 0x1bc0, 0x1bf3, - 0x1bfc, 0x1bff -}; -UCP_FN(Batak) - -static const unichar ucp_Bengali_def[] = { - 0x980, 0x983, - 0x985, 0x98c, - 0x98f, 0x990, - 0x993, 0x9a8, - 0x9aa, 0x9b0, - 0x9b2, 0x9b2, - 0x9b6, 0x9b9, - 0x9bc, 0x9c4, - 0x9c7, 0x9c8, - 0x9cb, 0x9ce, - 0x9d7, 0x9d7, - 0x9dc, 0x9dd, - 0x9df, 0x9e3, - 0x9e6, 0x9fb -}; -UCP_FN(Bengali) - -static const unichar ucp_Bopomofo_def[] = { - 0x2ea, 0x2eb, - 0x3105, 0x312d, - 0x31a0, 0x31ba -}; -UCP_FN(Bopomofo) - -static const unichar ucp_Brahmi_def[] = { - 0x11000, 0x1104d, - 0x11052, 0x1106f, - 0x1107f, 0x1107f -}; -UCP_FN(Brahmi) - -static const unichar ucp_Braille_def[] = { - 0x2800, 0x28ff -}; -UCP_FN(Braille) - -static const unichar ucp_Buginese_def[] = { - 0x1a00, 0x1a1b, - 0x1a1e, 0x1a1f -}; -UCP_FN(Buginese) - -static const unichar ucp_Buhid_def[] = { - 0x1740, 0x1753 -}; -UCP_FN(Buhid) - -static const unichar ucp_Canadian_Aboriginal_def[] = { - 0x1400, 0x167f, - 0x18b0, 0x18f5 -}; -UCP_FN(Canadian_Aboriginal) - -static const unichar ucp_Carian_def[] = { - 0x102a0, 0x102d0 -}; -UCP_FN(Carian) - -static const unichar ucp_Caucasian_Albanian_def[] = { - 0x10530, 0x10563, - 0x1056f, 0x1056f -}; -UCP_FN(Caucasian_Albanian) - -static const unichar ucp_Chakma_def[] = { - 0x11100, 0x11134, - 0x11136, 0x11143 -}; -UCP_FN(Chakma) - -static const unichar ucp_Cham_def[] = { - 0xaa00, 0xaa36, - 0xaa40, 0xaa4d, - 0xaa50, 0xaa59, - 0xaa5c, 0xaa5f -}; -UCP_FN(Cham) - -static const unichar ucp_Cherokee_def[] = { - 0x13a0, 0x13f4 -}; -UCP_FN(Cherokee) - -static const unichar ucp_Common_def[] = { - 0x0, 0x40, - 0x5b, 0x60, - 0x7b, 0xa9, - 0xab, 0xb9, - 0xbb, 0xbf, - 0xd7, 0xd7, - 0xf7, 0xf7, - 0x2b9, 0x2df, - 0x2e5, 0x2e9, - 0x2ec, 0x2ff, - 0x374, 0x374, - 0x378, 0x379, - 0x37e, 0x37e, - 0x380, 0x383, - 0x385, 0x385, - 0x387, 0x387, - 0x38b, 0x38b, - 0x38d, 0x38d, - 0x3a2, 0x3a2, - 0x530, 0x530, - 0x557, 0x558, - 0x560, 0x560, - 0x588, 0x589, - 0x58b, 0x58c, - 0x590, 0x590, - 0x5c8, 0x5cf, - 0x5eb, 0x5ef, - 0x5f5, 0x5ff, - 0x605, 0x605, - 0x60c, 0x60c, - 0x61b, 0x61d, - 0x61f, 0x61f, - 0x640, 0x640, - 0x660, 0x669, - 0x6dd, 0x6dd, - 0x70e, 0x70e, - 0x74b, 0x74c, - 0x7b2, 0x7bf, - 0x7fb, 0x7ff, - 0x82e, 0x82f, - 0x83f, 0x83f, - 0x85c, 0x85d, - 0x85f, 0x89f, - 0x8b3, 0x8e3, - 0x964, 0x965, - 0x984, 0x984, - 0x98d, 0x98e, - 0x991, 0x992, - 0x9a9, 0x9a9, - 0x9b1, 0x9b1, - 0x9b3, 0x9b5, - 0x9ba, 0x9bb, - 0x9c5, 0x9c6, - 0x9c9, 0x9ca, - 0x9cf, 0x9d6, - 0x9d8, 0x9db, - 0x9de, 0x9de, - 0x9e4, 0x9e5, - 0x9fc, 0xa00, - 0xa04, 0xa04, - 0xa0b, 0xa0e, - 0xa11, 0xa12, - 0xa29, 0xa29, - 0xa31, 0xa31, - 0xa34, 0xa34, - 0xa37, 0xa37, - 0xa3a, 0xa3b, - 0xa3d, 0xa3d, - 0xa43, 0xa46, - 0xa49, 0xa4a, - 0xa4e, 0xa50, - 0xa52, 0xa58, - 0xa5d, 0xa5d, - 0xa5f, 0xa65, - 0xa76, 0xa80, - 0xa84, 0xa84, - 0xa8e, 0xa8e, - 0xa92, 0xa92, - 0xaa9, 0xaa9, - 0xab1, 0xab1, - 0xab4, 0xab4, - 0xaba, 0xabb, - 0xac6, 0xac6, - 0xaca, 0xaca, - 0xace, 0xacf, - 0xad1, 0xadf, - 0xae4, 0xae5, - 0xaf2, 0xb00, - 0xb04, 0xb04, - 0xb0d, 0xb0e, - 0xb11, 0xb12, - 0xb29, 0xb29, - 0xb31, 0xb31, - 0xb34, 0xb34, - 0xb3a, 0xb3b, - 0xb45, 0xb46, - 0xb49, 0xb4a, - 0xb4e, 0xb55, - 0xb58, 0xb5b, - 0xb5e, 0xb5e, - 0xb64, 0xb65, - 0xb78, 0xb81, - 0xb84, 0xb84, - 0xb8b, 0xb8d, - 0xb91, 0xb91, - 0xb96, 0xb98, - 0xb9b, 0xb9b, - 0xb9d, 0xb9d, - 0xba0, 0xba2, - 0xba5, 0xba7, - 0xbab, 0xbad, - 0xbba, 0xbbd, - 0xbc3, 0xbc5, - 0xbc9, 0xbc9, - 0xbce, 0xbcf, - 0xbd1, 0xbd6, - 0xbd8, 0xbe5, - 0xbfb, 0xbff, - 0xc04, 0xc04, - 0xc0d, 0xc0d, - 0xc11, 0xc11, - 0xc29, 0xc29, - 0xc3a, 0xc3c, - 0xc45, 0xc45, - 0xc49, 0xc49, - 0xc4e, 0xc54, - 0xc57, 0xc57, - 0xc5a, 0xc5f, - 0xc64, 0xc65, - 0xc70, 0xc77, - 0xc80, 0xc80, - 0xc84, 0xc84, - 0xc8d, 0xc8d, - 0xc91, 0xc91, - 0xca9, 0xca9, - 0xcb4, 0xcb4, - 0xcba, 0xcbb, - 0xcc5, 0xcc5, - 0xcc9, 0xcc9, - 0xcce, 0xcd4, - 0xcd7, 0xcdd, - 0xcdf, 0xcdf, - 0xce4, 0xce5, - 0xcf0, 0xcf0, - 0xcf3, 0xd00, - 0xd04, 0xd04, - 0xd0d, 0xd0d, - 0xd11, 0xd11, - 0xd3b, 0xd3c, - 0xd45, 0xd45, - 0xd49, 0xd49, - 0xd4f, 0xd56, - 0xd58, 0xd5f, - 0xd64, 0xd65, - 0xd76, 0xd78, - 0xd80, 0xd81, - 0xd84, 0xd84, - 0xd97, 0xd99, - 0xdb2, 0xdb2, - 0xdbc, 0xdbc, - 0xdbe, 0xdbf, - 0xdc7, 0xdc9, - 0xdcb, 0xdce, - 0xdd5, 0xdd5, - 0xdd7, 0xdd7, - 0xde0, 0xde5, - 0xdf0, 0xdf1, - 0xdf5, 0xe00, - 0xe3b, 0xe3f, - 0xe5c, 0xe80, - 0xe83, 0xe83, - 0xe85, 0xe86, - 0xe89, 0xe89, - 0xe8b, 0xe8c, - 0xe8e, 0xe93, - 0xe98, 0xe98, - 0xea0, 0xea0, - 0xea4, 0xea4, - 0xea6, 0xea6, - 0xea8, 0xea9, - 0xeac, 0xeac, - 0xeba, 0xeba, - 0xebe, 0xebf, - 0xec5, 0xec5, - 0xec7, 0xec7, - 0xece, 0xecf, - 0xeda, 0xedb, - 0xee0, 0xeff, - 0xf48, 0xf48, - 0xf6d, 0xf70, - 0xf98, 0xf98, - 0xfbd, 0xfbd, - 0xfcd, 0xfcd, - 0xfd5, 0xfd8, - 0xfdb, 0xfff, - 0x10c6, 0x10c6, - 0x10c8, 0x10cc, - 0x10ce, 0x10cf, - 0x10fb, 0x10fb, - 0x1249, 0x1249, - 0x124e, 0x124f, - 0x1257, 0x1257, - 0x1259, 0x1259, - 0x125e, 0x125f, - 0x1289, 0x1289, - 0x128e, 0x128f, - 0x12b1, 0x12b1, - 0x12b6, 0x12b7, - 0x12bf, 0x12bf, - 0x12c1, 0x12c1, - 0x12c6, 0x12c7, - 0x12d7, 0x12d7, - 0x1311, 0x1311, - 0x1316, 0x1317, - 0x135b, 0x135c, - 0x137d, 0x137f, - 0x139a, 0x139f, - 0x13f5, 0x13ff, - 0x169d, 0x169f, - 0x16eb, 0x16ed, - 0x16f9, 0x16ff, - 0x170d, 0x170d, - 0x1715, 0x171f, - 0x1735, 0x173f, - 0x1754, 0x175f, - 0x176d, 0x176d, - 0x1771, 0x1771, - 0x1774, 0x177f, - 0x17de, 0x17df, - 0x17ea, 0x17ef, - 0x17fa, 0x17ff, - 0x1802, 0x1803, - 0x1805, 0x1805, - 0x180f, 0x180f, - 0x181a, 0x181f, - 0x1878, 0x187f, - 0x18ab, 0x18af, - 0x18f6, 0x18ff, - 0x191f, 0x191f, - 0x192c, 0x192f, - 0x193c, 0x193f, - 0x1941, 0x1943, - 0x196e, 0x196f, - 0x1975, 0x197f, - 0x19ac, 0x19af, - 0x19ca, 0x19cf, - 0x19db, 0x19dd, - 0x1a1c, 0x1a1d, - 0x1a5f, 0x1a5f, - 0x1a7d, 0x1a7e, - 0x1a8a, 0x1a8f, - 0x1a9a, 0x1a9f, - 0x1aae, 0x1aaf, - 0x1abf, 0x1aff, - 0x1b4c, 0x1b4f, - 0x1b7d, 0x1b7f, - 0x1bf4, 0x1bfb, - 0x1c38, 0x1c3a, - 0x1c4a, 0x1c4c, - 0x1c80, 0x1cbf, - 0x1cc8, 0x1ccf, - 0x1cd3, 0x1cd3, - 0x1ce1, 0x1ce1, - 0x1ce9, 0x1cec, - 0x1cee, 0x1cf3, - 0x1cf5, 0x1cf7, - 0x1cfa, 0x1cff, - 0x1df6, 0x1dfb, - 0x1f16, 0x1f17, - 0x1f1e, 0x1f1f, - 0x1f46, 0x1f47, - 0x1f4e, 0x1f4f, - 0x1f58, 0x1f58, - 0x1f5a, 0x1f5a, - 0x1f5c, 0x1f5c, - 0x1f5e, 0x1f5e, - 0x1f7e, 0x1f7f, - 0x1fb5, 0x1fb5, - 0x1fc5, 0x1fc5, - 0x1fd4, 0x1fd5, - 0x1fdc, 0x1fdc, - 0x1ff0, 0x1ff1, - 0x1ff5, 0x1ff5, - 0x1fff, 0x200b, - 0x200e, 0x2070, - 0x2072, 0x207e, - 0x2080, 0x208f, - 0x209d, 0x20cf, - 0x20f1, 0x2125, - 0x2127, 0x2129, - 0x212c, 0x2131, - 0x2133, 0x214d, - 0x214f, 0x215f, - 0x2189, 0x27ff, - 0x2900, 0x2bff, - 0x2c2f, 0x2c2f, - 0x2c5f, 0x2c5f, - 0x2cf4, 0x2cf8, - 0x2d26, 0x2d26, - 0x2d28, 0x2d2c, - 0x2d2e, 0x2d2f, - 0x2d68, 0x2d6e, - 0x2d71, 0x2d7e, - 0x2d97, 0x2d9f, - 0x2da7, 0x2da7, - 0x2daf, 0x2daf, - 0x2db7, 0x2db7, - 0x2dbf, 0x2dbf, - 0x2dc7, 0x2dc7, - 0x2dcf, 0x2dcf, - 0x2dd7, 0x2dd7, - 0x2ddf, 0x2ddf, - 0x2e00, 0x2e7f, - 0x2e9a, 0x2e9a, - 0x2ef4, 0x2eff, - 0x2fd6, 0x3004, - 0x3006, 0x3006, - 0x3008, 0x3020, - 0x3030, 0x3037, - 0x303c, 0x3040, - 0x3097, 0x3098, - 0x309b, 0x309c, - 0x30a0, 0x30a0, - 0x30fb, 0x30fc, - 0x3100, 0x3104, - 0x312e, 0x3130, - 0x318f, 0x319f, - 0x31bb, 0x31ef, - 0x321f, 0x325f, - 0x327f, 0x32cf, - 0x32ff, 0x32ff, - 0x3358, 0x33ff, - 0x4db6, 0x4dff, - 0x9fcd, 0x9fff, - 0xa48d, 0xa48f, - 0xa4c7, 0xa4cf, - 0xa62c, 0xa63f, - 0xa69e, 0xa69e, - 0xa6f8, 0xa721, - 0xa788, 0xa78a, - 0xa78f, 0xa78f, - 0xa7ae, 0xa7af, - 0xa7b2, 0xa7f6, - 0xa82c, 0xa83f, - 0xa878, 0xa87f, - 0xa8c5, 0xa8cd, - 0xa8da, 0xa8df, - 0xa8fc, 0xa8ff, - 0xa92e, 0xa92e, - 0xa954, 0xa95e, - 0xa97d, 0xa97f, - 0xa9ce, 0xa9cf, - 0xa9da, 0xa9dd, - 0xa9ff, 0xa9ff, - 0xaa37, 0xaa3f, - 0xaa4e, 0xaa4f, - 0xaa5a, 0xaa5b, - 0xaac3, 0xaada, - 0xaaf7, 0xab00, - 0xab07, 0xab08, - 0xab0f, 0xab10, - 0xab17, 0xab1f, - 0xab27, 0xab27, - 0xab2f, 0xab2f, - 0xab5b, 0xab5b, - 0xab60, 0xab63, - 0xab66, 0xabbf, - 0xabee, 0xabef, - 0xabfa, 0xabff, - 0xd7a4, 0xd7af, - 0xd7c7, 0xd7ca, - 0xd7fc, 0xf8ff, - 0xfa6e, 0xfa6f, - 0xfada, 0xfaff, - 0xfb07, 0xfb12, - 0xfb18, 0xfb1c, - 0xfb37, 0xfb37, - 0xfb3d, 0xfb3d, - 0xfb3f, 0xfb3f, - 0xfb42, 0xfb42, - 0xfb45, 0xfb45, - 0xfbc2, 0xfbd2, - 0xfd3e, 0xfd4f, - 0xfd90, 0xfd91, - 0xfdc8, 0xfdef, - 0xfdfe, 0xfdff, - 0xfe10, 0xfe1f, - 0xfe2e, 0xfe6f, - 0xfe75, 0xfe75, - 0xfefd, 0xff20, - 0xff3b, 0xff40, - 0xff5b, 0xff65, - 0xff70, 0xff70, - 0xff9e, 0xff9f, - 0xffbf, 0xffc1, - 0xffc8, 0xffc9, - 0xffd0, 0xffd1, - 0xffd8, 0xffd9, - 0xffdd, 0xffff, - 0x1000c, 0x1000c, - 0x10027, 0x10027, - 0x1003b, 0x1003b, - 0x1003e, 0x1003e, - 0x1004e, 0x1004f, - 0x1005e, 0x1007f, - 0x100fb, 0x1013f, - 0x1018d, 0x1019f, - 0x101a1, 0x101fc, - 0x101fe, 0x1027f, - 0x1029d, 0x1029f, - 0x102d1, 0x102df, - 0x102e1, 0x102ff, - 0x10324, 0x1032f, - 0x1034b, 0x1034f, - 0x1037b, 0x1037f, - 0x1039e, 0x1039e, - 0x103c4, 0x103c7, - 0x103d6, 0x103ff, - 0x1049e, 0x1049f, - 0x104aa, 0x104ff, - 0x10528, 0x1052f, - 0x10564, 0x1056e, - 0x10570, 0x105ff, - 0x10737, 0x1073f, - 0x10756, 0x1075f, - 0x10768, 0x107ff, - 0x10806, 0x10807, - 0x10809, 0x10809, - 0x10836, 0x10836, - 0x10839, 0x1083b, - 0x1083d, 0x1083e, - 0x10856, 0x10856, - 0x1089f, 0x108a6, - 0x108b0, 0x108ff, - 0x1091c, 0x1091e, - 0x1093a, 0x1093e, - 0x10940, 0x1097f, - 0x109b8, 0x109bd, - 0x109c0, 0x109ff, - 0x10a04, 0x10a04, - 0x10a07, 0x10a0b, - 0x10a14, 0x10a14, - 0x10a18, 0x10a18, - 0x10a34, 0x10a37, - 0x10a3b, 0x10a3e, - 0x10a48, 0x10a4f, - 0x10a59, 0x10a5f, - 0x10aa0, 0x10abf, - 0x10ae7, 0x10aea, - 0x10af7, 0x10aff, - 0x10b36, 0x10b38, - 0x10b56, 0x10b57, - 0x10b73, 0x10b77, - 0x10b92, 0x10b98, - 0x10b9d, 0x10ba8, - 0x10bb0, 0x10bff, - 0x10c49, 0x10e5f, - 0x10e7f, 0x10fff, - 0x1104e, 0x11051, - 0x11070, 0x1107e, - 0x110c2, 0x110cf, - 0x110e9, 0x110ef, - 0x110fa, 0x110ff, - 0x11135, 0x11135, - 0x11144, 0x1114f, - 0x11177, 0x1117f, - 0x111c9, 0x111cc, - 0x111ce, 0x111cf, - 0x111db, 0x111e0, - 0x111f5, 0x111ff, - 0x11212, 0x11212, - 0x1123e, 0x112af, - 0x112eb, 0x112ef, - 0x112fa, 0x11300, - 0x11304, 0x11304, - 0x1130d, 0x1130e, - 0x11311, 0x11312, - 0x11329, 0x11329, - 0x11331, 0x11331, - 0x11334, 0x11334, - 0x1133a, 0x1133b, - 0x11345, 0x11346, - 0x11349, 0x1134a, - 0x1134e, 0x11356, - 0x11358, 0x1135c, - 0x11364, 0x11365, - 0x1136d, 0x1136f, - 0x11375, 0x1147f, - 0x114c8, 0x114cf, - 0x114da, 0x1157f, - 0x115b6, 0x115b7, - 0x115ca, 0x115ff, - 0x11645, 0x1164f, - 0x1165a, 0x1167f, - 0x116b8, 0x116bf, - 0x116ca, 0x1189f, - 0x118f3, 0x118fe, - 0x11900, 0x11abf, - 0x11af9, 0x11fff, - 0x12399, 0x123ff, - 0x1246f, 0x1246f, - 0x12475, 0x12fff, - 0x1342f, 0x167ff, - 0x16a39, 0x16a3f, - 0x16a5f, 0x16a5f, - 0x16a6a, 0x16a6d, - 0x16a70, 0x16acf, - 0x16aee, 0x16aef, - 0x16af6, 0x16aff, - 0x16b46, 0x16b4f, - 0x16b5a, 0x16b5a, - 0x16b62, 0x16b62, - 0x16b78, 0x16b7c, - 0x16b90, 0x16eff, - 0x16f45, 0x16f4f, - 0x16f7f, 0x16f8e, - 0x16fa0, 0x1afff, - 0x1b002, 0x1bbff, - 0x1bc6b, 0x1bc6f, - 0x1bc7d, 0x1bc7f, - 0x1bc89, 0x1bc8f, - 0x1bc9a, 0x1bc9b, - 0x1bca0, 0x1d166, - 0x1d16a, 0x1d17a, - 0x1d183, 0x1d184, - 0x1d18c, 0x1d1a9, - 0x1d1ae, 0x1d1ff, - 0x1d246, 0x1e7ff, - 0x1e8c5, 0x1e8c6, - 0x1e8d7, 0x1edff, - 0x1ee04, 0x1ee04, - 0x1ee20, 0x1ee20, - 0x1ee23, 0x1ee23, - 0x1ee25, 0x1ee26, - 0x1ee28, 0x1ee28, - 0x1ee33, 0x1ee33, - 0x1ee38, 0x1ee38, - 0x1ee3a, 0x1ee3a, - 0x1ee3c, 0x1ee41, - 0x1ee43, 0x1ee46, - 0x1ee48, 0x1ee48, - 0x1ee4a, 0x1ee4a, - 0x1ee4c, 0x1ee4c, - 0x1ee50, 0x1ee50, - 0x1ee53, 0x1ee53, - 0x1ee55, 0x1ee56, - 0x1ee58, 0x1ee58, - 0x1ee5a, 0x1ee5a, - 0x1ee5c, 0x1ee5c, - 0x1ee5e, 0x1ee5e, - 0x1ee60, 0x1ee60, - 0x1ee63, 0x1ee63, - 0x1ee65, 0x1ee66, - 0x1ee6b, 0x1ee6b, - 0x1ee73, 0x1ee73, - 0x1ee78, 0x1ee78, - 0x1ee7d, 0x1ee7d, - 0x1ee7f, 0x1ee7f, - 0x1ee8a, 0x1ee8a, - 0x1ee9c, 0x1eea0, - 0x1eea4, 0x1eea4, - 0x1eeaa, 0x1eeaa, - 0x1eebc, 0x1eeef, - 0x1eef2, 0x1f1ff, - 0x1f201, 0x1ffff, - 0x2a6d7, 0x2a6ff, - 0x2b735, 0x2b73f, - 0x2b81e, 0x2f7ff, - 0x2fa1e, 0xe00ff, - 0xe01f0, 0x10ffff -}; -UCP_FN(Common) - -static const unichar ucp_Coptic_def[] = { - 0x3e2, 0x3ef, - 0x2c80, 0x2cf3, - 0x2cf9, 0x2cff -}; -UCP_FN(Coptic) - -static const unichar ucp_Cuneiform_def[] = { - 0x12000, 0x12398, - 0x12400, 0x1246e, - 0x12470, 0x12474 -}; -UCP_FN(Cuneiform) - -static const unichar ucp_Cypriot_def[] = { - 0x10800, 0x10805, - 0x10808, 0x10808, - 0x1080a, 0x10835, - 0x10837, 0x10838, - 0x1083c, 0x1083c, - 0x1083f, 0x1083f -}; -UCP_FN(Cypriot) - -static const unichar ucp_Cyrillic_def[] = { - 0x400, 0x484, - 0x487, 0x52f, - 0x1d2b, 0x1d2b, - 0x1d78, 0x1d78, - 0x2de0, 0x2dff, - 0xa640, 0xa69d, - 0xa69f, 0xa69f -}; -UCP_FN(Cyrillic) - -static const unichar ucp_Deseret_def[] = { - 0x10400, 0x1044f -}; -UCP_FN(Deseret) - -static const unichar ucp_Devanagari_def[] = { - 0x900, 0x950, - 0x953, 0x963, - 0x966, 0x97f, - 0xa8e0, 0xa8fb -}; -UCP_FN(Devanagari) - -static const unichar ucp_Duployan_def[] = { - 0x1bc00, 0x1bc6a, - 0x1bc70, 0x1bc7c, - 0x1bc80, 0x1bc88, - 0x1bc90, 0x1bc99, - 0x1bc9c, 0x1bc9f -}; -UCP_FN(Duployan) - -static const unichar ucp_Egyptian_Hieroglyphs_def[] = { - 0x13000, 0x1342e -}; -UCP_FN(Egyptian_Hieroglyphs) - -static const unichar ucp_Elbasan_def[] = { - 0x10500, 0x10527 -}; -UCP_FN(Elbasan) - -static const unichar ucp_Ethiopic_def[] = { - 0x1200, 0x1248, - 0x124a, 0x124d, - 0x1250, 0x1256, - 0x1258, 0x1258, - 0x125a, 0x125d, - 0x1260, 0x1288, - 0x128a, 0x128d, - 0x1290, 0x12b0, - 0x12b2, 0x12b5, - 0x12b8, 0x12be, - 0x12c0, 0x12c0, - 0x12c2, 0x12c5, - 0x12c8, 0x12d6, - 0x12d8, 0x1310, - 0x1312, 0x1315, - 0x1318, 0x135a, - 0x135d, 0x137c, - 0x1380, 0x1399, - 0x2d80, 0x2d96, - 0x2da0, 0x2da6, - 0x2da8, 0x2dae, - 0x2db0, 0x2db6, - 0x2db8, 0x2dbe, - 0x2dc0, 0x2dc6, - 0x2dc8, 0x2dce, - 0x2dd0, 0x2dd6, - 0x2dd8, 0x2dde, - 0xab01, 0xab06, - 0xab09, 0xab0e, - 0xab11, 0xab16, - 0xab20, 0xab26, - 0xab28, 0xab2e -}; -UCP_FN(Ethiopic) - -static const unichar ucp_Georgian_def[] = { - 0x10a0, 0x10c5, - 0x10c7, 0x10c7, - 0x10cd, 0x10cd, - 0x10d0, 0x10fa, - 0x10fc, 0x10ff, - 0x2d00, 0x2d25, - 0x2d27, 0x2d27, - 0x2d2d, 0x2d2d -}; -UCP_FN(Georgian) - -static const unichar ucp_Glagolitic_def[] = { - 0x2c00, 0x2c2e, - 0x2c30, 0x2c5e -}; -UCP_FN(Glagolitic) - -static const unichar ucp_Gothic_def[] = { - 0x10330, 0x1034a -}; -UCP_FN(Gothic) - -static const unichar ucp_Grantha_def[] = { - 0x11301, 0x11303, - 0x11305, 0x1130c, - 0x1130f, 0x11310, - 0x11313, 0x11328, - 0x1132a, 0x11330, - 0x11332, 0x11333, - 0x11335, 0x11339, - 0x1133c, 0x11344, - 0x11347, 0x11348, - 0x1134b, 0x1134d, - 0x11357, 0x11357, - 0x1135d, 0x11363, - 0x11366, 0x1136c, - 0x11370, 0x11374 -}; -UCP_FN(Grantha) - -static const unichar ucp_Greek_def[] = { - 0x370, 0x373, - 0x375, 0x377, - 0x37a, 0x37d, - 0x37f, 0x37f, - 0x384, 0x384, - 0x386, 0x386, - 0x388, 0x38a, - 0x38c, 0x38c, - 0x38e, 0x3a1, - 0x3a3, 0x3e1, - 0x3f0, 0x3ff, - 0x1d26, 0x1d2a, - 0x1d5d, 0x1d61, - 0x1d66, 0x1d6a, - 0x1dbf, 0x1dbf, - 0x1f00, 0x1f15, - 0x1f18, 0x1f1d, - 0x1f20, 0x1f45, - 0x1f48, 0x1f4d, - 0x1f50, 0x1f57, - 0x1f59, 0x1f59, - 0x1f5b, 0x1f5b, - 0x1f5d, 0x1f5d, - 0x1f5f, 0x1f7d, - 0x1f80, 0x1fb4, - 0x1fb6, 0x1fc4, - 0x1fc6, 0x1fd3, - 0x1fd6, 0x1fdb, - 0x1fdd, 0x1fef, - 0x1ff2, 0x1ff4, - 0x1ff6, 0x1ffe, - 0x2126, 0x2126, - 0xab65, 0xab65, - 0x10140, 0x1018c, - 0x101a0, 0x101a0, - 0x1d200, 0x1d245 -}; -UCP_FN(Greek) - -static const unichar ucp_Gujarati_def[] = { - 0xa81, 0xa83, - 0xa85, 0xa8d, - 0xa8f, 0xa91, - 0xa93, 0xaa8, - 0xaaa, 0xab0, - 0xab2, 0xab3, - 0xab5, 0xab9, - 0xabc, 0xac5, - 0xac7, 0xac9, - 0xacb, 0xacd, - 0xad0, 0xad0, - 0xae0, 0xae3, - 0xae6, 0xaf1 -}; -UCP_FN(Gujarati) - -static const unichar ucp_Gurmukhi_def[] = { - 0xa01, 0xa03, - 0xa05, 0xa0a, - 0xa0f, 0xa10, - 0xa13, 0xa28, - 0xa2a, 0xa30, - 0xa32, 0xa33, - 0xa35, 0xa36, - 0xa38, 0xa39, - 0xa3c, 0xa3c, - 0xa3e, 0xa42, - 0xa47, 0xa48, - 0xa4b, 0xa4d, - 0xa51, 0xa51, - 0xa59, 0xa5c, - 0xa5e, 0xa5e, - 0xa66, 0xa75 -}; -UCP_FN(Gurmukhi) - -static const unichar ucp_Han_def[] = { - 0x2e80, 0x2e99, - 0x2e9b, 0x2ef3, - 0x2f00, 0x2fd5, - 0x3005, 0x3005, - 0x3007, 0x3007, - 0x3021, 0x3029, - 0x3038, 0x303b, - 0x3400, 0x4db5, - 0x4e00, 0x9fcc, - 0xf900, 0xfa6d, - 0xfa70, 0xfad9, - 0x20000, 0x2a6d6, - 0x2a700, 0x2b734, - 0x2b740, 0x2b81d, - 0x2f800, 0x2fa1d -}; -UCP_FN(Han) - -static const unichar ucp_Hangul_def[] = { - 0x1100, 0x11ff, - 0x302e, 0x302f, - 0x3131, 0x318e, - 0x3200, 0x321e, - 0x3260, 0x327e, - 0xa960, 0xa97c, - 0xac00, 0xd7a3, - 0xd7b0, 0xd7c6, - 0xd7cb, 0xd7fb, - 0xffa0, 0xffbe, - 0xffc2, 0xffc7, - 0xffca, 0xffcf, - 0xffd2, 0xffd7, - 0xffda, 0xffdc -}; -UCP_FN(Hangul) - -static const unichar ucp_Hanunoo_def[] = { - 0x1720, 0x1734 -}; -UCP_FN(Hanunoo) - -static const unichar ucp_Hebrew_def[] = { - 0x591, 0x5c7, - 0x5d0, 0x5ea, - 0x5f0, 0x5f4, - 0xfb1d, 0xfb36, - 0xfb38, 0xfb3c, - 0xfb3e, 0xfb3e, - 0xfb40, 0xfb41, - 0xfb43, 0xfb44, - 0xfb46, 0xfb4f -}; -UCP_FN(Hebrew) - -static const unichar ucp_Hiragana_def[] = { - 0x3041, 0x3096, - 0x309d, 0x309f, - 0x1b001, 0x1b001, - 0x1f200, 0x1f200 -}; -UCP_FN(Hiragana) - -static const unichar ucp_Imperial_Aramaic_def[] = { - 0x10840, 0x10855, - 0x10857, 0x1085f -}; -UCP_FN(Imperial_Aramaic) - -static const unichar ucp_Inherited_def[] = { - 0x300, 0x36f, - 0x485, 0x486, - 0x64b, 0x655, - 0x670, 0x670, - 0x951, 0x952, - 0x1ab0, 0x1abe, - 0x1cd0, 0x1cd2, - 0x1cd4, 0x1ce0, - 0x1ce2, 0x1ce8, - 0x1ced, 0x1ced, - 0x1cf4, 0x1cf4, - 0x1cf8, 0x1cf9, - 0x1dc0, 0x1df5, - 0x1dfc, 0x1dff, - 0x200c, 0x200d, - 0x20d0, 0x20f0, - 0x302a, 0x302d, - 0x3099, 0x309a, - 0xfe00, 0xfe0f, - 0xfe20, 0xfe2d, - 0x101fd, 0x101fd, - 0x102e0, 0x102e0, - 0x1d167, 0x1d169, - 0x1d17b, 0x1d182, - 0x1d185, 0x1d18b, - 0x1d1aa, 0x1d1ad, - 0xe0100, 0xe01ef -}; -UCP_FN(Inherited) - -static const unichar ucp_Inscriptional_Pahlavi_def[] = { - 0x10b60, 0x10b72, - 0x10b78, 0x10b7f -}; -UCP_FN(Inscriptional_Pahlavi) - -static const unichar ucp_Inscriptional_Parthian_def[] = { - 0x10b40, 0x10b55, - 0x10b58, 0x10b5f -}; -UCP_FN(Inscriptional_Parthian) - -static const unichar ucp_Javanese_def[] = { - 0xa980, 0xa9cd, - 0xa9d0, 0xa9d9, - 0xa9de, 0xa9df -}; -UCP_FN(Javanese) - -static const unichar ucp_Kaithi_def[] = { - 0x11080, 0x110c1 -}; -UCP_FN(Kaithi) - -static const unichar ucp_Kannada_def[] = { - 0xc81, 0xc83, - 0xc85, 0xc8c, - 0xc8e, 0xc90, - 0xc92, 0xca8, - 0xcaa, 0xcb3, - 0xcb5, 0xcb9, - 0xcbc, 0xcc4, - 0xcc6, 0xcc8, - 0xcca, 0xccd, - 0xcd5, 0xcd6, - 0xcde, 0xcde, - 0xce0, 0xce3, - 0xce6, 0xcef, - 0xcf1, 0xcf2 -}; -UCP_FN(Kannada) - -static const unichar ucp_Katakana_def[] = { - 0x30a1, 0x30fa, - 0x30fd, 0x30ff, - 0x31f0, 0x31ff, - 0x32d0, 0x32fe, - 0x3300, 0x3357, - 0xff66, 0xff6f, - 0xff71, 0xff9d, - 0x1b000, 0x1b000 -}; -UCP_FN(Katakana) - -static const unichar ucp_Kayah_Li_def[] = { - 0xa900, 0xa92d, - 0xa92f, 0xa92f -}; -UCP_FN(Kayah_Li) - -static const unichar ucp_Kharoshthi_def[] = { - 0x10a00, 0x10a03, - 0x10a05, 0x10a06, - 0x10a0c, 0x10a13, - 0x10a15, 0x10a17, - 0x10a19, 0x10a33, - 0x10a38, 0x10a3a, - 0x10a3f, 0x10a47, - 0x10a50, 0x10a58 -}; -UCP_FN(Kharoshthi) - -static const unichar ucp_Khmer_def[] = { - 0x1780, 0x17dd, - 0x17e0, 0x17e9, - 0x17f0, 0x17f9, - 0x19e0, 0x19ff -}; -UCP_FN(Khmer) - -static const unichar ucp_Khojki_def[] = { - 0x11200, 0x11211, - 0x11213, 0x1123d -}; -UCP_FN(Khojki) - -static const unichar ucp_Khudawadi_def[] = { - 0x112b0, 0x112ea, - 0x112f0, 0x112f9 -}; -UCP_FN(Khudawadi) - -static const unichar ucp_Lao_def[] = { - 0xe81, 0xe82, - 0xe84, 0xe84, - 0xe87, 0xe88, - 0xe8a, 0xe8a, - 0xe8d, 0xe8d, - 0xe94, 0xe97, - 0xe99, 0xe9f, - 0xea1, 0xea3, - 0xea5, 0xea5, - 0xea7, 0xea7, - 0xeaa, 0xeab, - 0xead, 0xeb9, - 0xebb, 0xebd, - 0xec0, 0xec4, - 0xec6, 0xec6, - 0xec8, 0xecd, - 0xed0, 0xed9, - 0xedc, 0xedf -}; -UCP_FN(Lao) - -static const unichar ucp_Latin_def[] = { - 0x41, 0x5a, - 0x61, 0x7a, - 0xaa, 0xaa, - 0xba, 0xba, - 0xc0, 0xd6, - 0xd8, 0xf6, - 0xf8, 0x2b8, - 0x2e0, 0x2e4, - 0x1d00, 0x1d25, - 0x1d2c, 0x1d5c, - 0x1d62, 0x1d65, - 0x1d6b, 0x1d77, - 0x1d79, 0x1dbe, - 0x1e00, 0x1eff, - 0x2071, 0x2071, - 0x207f, 0x207f, - 0x2090, 0x209c, - 0x212a, 0x212b, - 0x2132, 0x2132, - 0x214e, 0x214e, - 0x2160, 0x2188, - 0x2c60, 0x2c7f, - 0xa722, 0xa787, - 0xa78b, 0xa78e, - 0xa790, 0xa7ad, - 0xa7b0, 0xa7b1, - 0xa7f7, 0xa7ff, - 0xab30, 0xab5a, - 0xab5c, 0xab5f, - 0xab64, 0xab64, - 0xfb00, 0xfb06, - 0xff21, 0xff3a, - 0xff41, 0xff5a -}; -UCP_FN(Latin) - -static const unichar ucp_Lepcha_def[] = { - 0x1c00, 0x1c37, - 0x1c3b, 0x1c49, - 0x1c4d, 0x1c4f -}; -UCP_FN(Lepcha) - -static const unichar ucp_Limbu_def[] = { - 0x1900, 0x191e, - 0x1920, 0x192b, - 0x1930, 0x193b, - 0x1940, 0x1940, - 0x1944, 0x194f -}; -UCP_FN(Limbu) - -static const unichar ucp_Linear_A_def[] = { - 0x10600, 0x10736, - 0x10740, 0x10755, - 0x10760, 0x10767 -}; -UCP_FN(Linear_A) - -static const unichar ucp_Linear_B_def[] = { - 0x10000, 0x1000b, - 0x1000d, 0x10026, - 0x10028, 0x1003a, - 0x1003c, 0x1003d, - 0x1003f, 0x1004d, - 0x10050, 0x1005d, - 0x10080, 0x100fa -}; -UCP_FN(Linear_B) - -static const unichar ucp_Lisu_def[] = { - 0xa4d0, 0xa4ff -}; -UCP_FN(Lisu) - -static const unichar ucp_Lycian_def[] = { - 0x10280, 0x1029c -}; -UCP_FN(Lycian) - -static const unichar ucp_Lydian_def[] = { - 0x10920, 0x10939, - 0x1093f, 0x1093f -}; -UCP_FN(Lydian) - -static const unichar ucp_Mahajani_def[] = { - 0x11150, 0x11176 -}; -UCP_FN(Mahajani) - -static const unichar ucp_Malayalam_def[] = { - 0xd01, 0xd03, - 0xd05, 0xd0c, - 0xd0e, 0xd10, - 0xd12, 0xd3a, - 0xd3d, 0xd44, - 0xd46, 0xd48, - 0xd4a, 0xd4e, - 0xd57, 0xd57, - 0xd60, 0xd63, - 0xd66, 0xd75, - 0xd79, 0xd7f -}; -UCP_FN(Malayalam) - -static const unichar ucp_Mandaic_def[] = { - 0x840, 0x85b, - 0x85e, 0x85e -}; -UCP_FN(Mandaic) - -static const unichar ucp_Manichaean_def[] = { - 0x10ac0, 0x10ae6, - 0x10aeb, 0x10af6 -}; -UCP_FN(Manichaean) - -static const unichar ucp_Meetei_Mayek_def[] = { - 0xaae0, 0xaaf6, - 0xabc0, 0xabed, - 0xabf0, 0xabf9 -}; -UCP_FN(Meetei_Mayek) - -static const unichar ucp_Mende_Kikakui_def[] = { - 0x1e800, 0x1e8c4, - 0x1e8c7, 0x1e8d6 -}; -UCP_FN(Mende_Kikakui) - -static const unichar ucp_Meroitic_Cursive_def[] = { - 0x109a0, 0x109b7, - 0x109be, 0x109bf -}; -UCP_FN(Meroitic_Cursive) - -static const unichar ucp_Meroitic_Hieroglyphs_def[] = { - 0x10980, 0x1099f -}; -UCP_FN(Meroitic_Hieroglyphs) - -static const unichar ucp_Miao_def[] = { - 0x16f00, 0x16f44, - 0x16f50, 0x16f7e, - 0x16f8f, 0x16f9f -}; -UCP_FN(Miao) - -static const unichar ucp_Modi_def[] = { - 0x11600, 0x11644, - 0x11650, 0x11659 -}; -UCP_FN(Modi) - -static const unichar ucp_Mongolian_def[] = { - 0x1800, 0x1801, - 0x1804, 0x1804, - 0x1806, 0x180e, - 0x1810, 0x1819, - 0x1820, 0x1877, - 0x1880, 0x18aa -}; -UCP_FN(Mongolian) - -static const unichar ucp_Mro_def[] = { - 0x16a40, 0x16a5e, - 0x16a60, 0x16a69, - 0x16a6e, 0x16a6f -}; -UCP_FN(Mro) - -static const unichar ucp_Myanmar_def[] = { - 0x1000, 0x109f, - 0xa9e0, 0xa9fe, - 0xaa60, 0xaa7f -}; -UCP_FN(Myanmar) - -static const unichar ucp_Nabataean_def[] = { - 0x10880, 0x1089e, - 0x108a7, 0x108af -}; -UCP_FN(Nabataean) - -static const unichar ucp_New_Tai_Lue_def[] = { - 0x1980, 0x19ab, - 0x19b0, 0x19c9, - 0x19d0, 0x19da, - 0x19de, 0x19df -}; -UCP_FN(New_Tai_Lue) - -static const unichar ucp_Nko_def[] = { - 0x7c0, 0x7fa -}; -UCP_FN(Nko) - -static const unichar ucp_Ogham_def[] = { - 0x1680, 0x169c -}; -UCP_FN(Ogham) - -static const unichar ucp_Ol_Chiki_def[] = { - 0x1c50, 0x1c7f -}; -UCP_FN(Ol_Chiki) - -static const unichar ucp_Old_Italic_def[] = { - 0x10300, 0x10323 -}; -UCP_FN(Old_Italic) - -static const unichar ucp_Old_North_Arabian_def[] = { - 0x10a80, 0x10a9f -}; -UCP_FN(Old_North_Arabian) - -static const unichar ucp_Old_Permic_def[] = { - 0x10350, 0x1037a -}; -UCP_FN(Old_Permic) - -static const unichar ucp_Old_Persian_def[] = { - 0x103a0, 0x103c3, - 0x103c8, 0x103d5 -}; -UCP_FN(Old_Persian) - -static const unichar ucp_Old_South_Arabian_def[] = { - 0x10a60, 0x10a7f -}; -UCP_FN(Old_South_Arabian) - -static const unichar ucp_Old_Turkic_def[] = { - 0x10c00, 0x10c48 -}; -UCP_FN(Old_Turkic) - -static const unichar ucp_Oriya_def[] = { - 0xb01, 0xb03, - 0xb05, 0xb0c, - 0xb0f, 0xb10, - 0xb13, 0xb28, - 0xb2a, 0xb30, - 0xb32, 0xb33, - 0xb35, 0xb39, - 0xb3c, 0xb44, - 0xb47, 0xb48, - 0xb4b, 0xb4d, - 0xb56, 0xb57, - 0xb5c, 0xb5d, - 0xb5f, 0xb63, - 0xb66, 0xb77 -}; -UCP_FN(Oriya) - -static const unichar ucp_Osmanya_def[] = { - 0x10480, 0x1049d, - 0x104a0, 0x104a9 -}; -UCP_FN(Osmanya) - -static const unichar ucp_Pahawh_Hmong_def[] = { - 0x16b00, 0x16b45, - 0x16b50, 0x16b59, - 0x16b5b, 0x16b61, - 0x16b63, 0x16b77, - 0x16b7d, 0x16b8f -}; -UCP_FN(Pahawh_Hmong) - -static const unichar ucp_Palmyrene_def[] = { - 0x10860, 0x1087f -}; -UCP_FN(Palmyrene) - -static const unichar ucp_Pau_Cin_Hau_def[] = { - 0x11ac0, 0x11af8 -}; -UCP_FN(Pau_Cin_Hau) - -static const unichar ucp_Phags_Pa_def[] = { - 0xa840, 0xa877 -}; -UCP_FN(Phags_Pa) - -static const unichar ucp_Phoenician_def[] = { - 0x10900, 0x1091b, - 0x1091f, 0x1091f -}; -UCP_FN(Phoenician) - -static const unichar ucp_Psalter_Pahlavi_def[] = { - 0x10b80, 0x10b91, - 0x10b99, 0x10b9c, - 0x10ba9, 0x10baf -}; -UCP_FN(Psalter_Pahlavi) - -static const unichar ucp_Rejang_def[] = { - 0xa930, 0xa953, - 0xa95f, 0xa95f -}; -UCP_FN(Rejang) - -static const unichar ucp_Runic_def[] = { - 0x16a0, 0x16ea, - 0x16ee, 0x16f8 -}; -UCP_FN(Runic) - -static const unichar ucp_Samaritan_def[] = { - 0x800, 0x82d, - 0x830, 0x83e -}; -UCP_FN(Samaritan) - -static const unichar ucp_Saurashtra_def[] = { - 0xa880, 0xa8c4, - 0xa8ce, 0xa8d9 -}; -UCP_FN(Saurashtra) - -static const unichar ucp_Sharada_def[] = { - 0x11180, 0x111c8, - 0x111cd, 0x111cd, - 0x111d0, 0x111da -}; -UCP_FN(Sharada) - -static const unichar ucp_Shavian_def[] = { - 0x10450, 0x1047f -}; -UCP_FN(Shavian) - -static const unichar ucp_Siddham_def[] = { - 0x11580, 0x115b5, - 0x115b8, 0x115c9 -}; -UCP_FN(Siddham) - -static const unichar ucp_Sinhala_def[] = { - 0xd82, 0xd83, - 0xd85, 0xd96, - 0xd9a, 0xdb1, - 0xdb3, 0xdbb, - 0xdbd, 0xdbd, - 0xdc0, 0xdc6, - 0xdca, 0xdca, - 0xdcf, 0xdd4, - 0xdd6, 0xdd6, - 0xdd8, 0xddf, - 0xde6, 0xdef, - 0xdf2, 0xdf4, - 0x111e1, 0x111f4 -}; -UCP_FN(Sinhala) - -static const unichar ucp_Sora_Sompeng_def[] = { - 0x110d0, 0x110e8, - 0x110f0, 0x110f9 -}; -UCP_FN(Sora_Sompeng) - -static const unichar ucp_Sundanese_def[] = { - 0x1b80, 0x1bbf, - 0x1cc0, 0x1cc7 -}; -UCP_FN(Sundanese) - -static const unichar ucp_Syloti_Nagri_def[] = { - 0xa800, 0xa82b -}; -UCP_FN(Syloti_Nagri) - -static const unichar ucp_Syriac_def[] = { - 0x700, 0x70d, - 0x70f, 0x74a, - 0x74d, 0x74f -}; -UCP_FN(Syriac) - -static const unichar ucp_Tagalog_def[] = { - 0x1700, 0x170c, - 0x170e, 0x1714 -}; -UCP_FN(Tagalog) - -static const unichar ucp_Tagbanwa_def[] = { - 0x1760, 0x176c, - 0x176e, 0x1770, - 0x1772, 0x1773 -}; -UCP_FN(Tagbanwa) - -static const unichar ucp_Tai_Le_def[] = { - 0x1950, 0x196d, - 0x1970, 0x1974 -}; -UCP_FN(Tai_Le) - -static const unichar ucp_Tai_Tham_def[] = { - 0x1a20, 0x1a5e, - 0x1a60, 0x1a7c, - 0x1a7f, 0x1a89, - 0x1a90, 0x1a99, - 0x1aa0, 0x1aad -}; -UCP_FN(Tai_Tham) - -static const unichar ucp_Tai_Viet_def[] = { - 0xaa80, 0xaac2, - 0xaadb, 0xaadf -}; -UCP_FN(Tai_Viet) - -static const unichar ucp_Takri_def[] = { - 0x11680, 0x116b7, - 0x116c0, 0x116c9 -}; -UCP_FN(Takri) - -static const unichar ucp_Tamil_def[] = { - 0xb82, 0xb83, - 0xb85, 0xb8a, - 0xb8e, 0xb90, - 0xb92, 0xb95, - 0xb99, 0xb9a, - 0xb9c, 0xb9c, - 0xb9e, 0xb9f, - 0xba3, 0xba4, - 0xba8, 0xbaa, - 0xbae, 0xbb9, - 0xbbe, 0xbc2, - 0xbc6, 0xbc8, - 0xbca, 0xbcd, - 0xbd0, 0xbd0, - 0xbd7, 0xbd7, - 0xbe6, 0xbfa -}; -UCP_FN(Tamil) - -static const unichar ucp_Telugu_def[] = { - 0xc00, 0xc03, - 0xc05, 0xc0c, - 0xc0e, 0xc10, - 0xc12, 0xc28, - 0xc2a, 0xc39, - 0xc3d, 0xc44, - 0xc46, 0xc48, - 0xc4a, 0xc4d, - 0xc55, 0xc56, - 0xc58, 0xc59, - 0xc60, 0xc63, - 0xc66, 0xc6f, - 0xc78, 0xc7f -}; -UCP_FN(Telugu) - -static const unichar ucp_Thaana_def[] = { - 0x780, 0x7b1 -}; -UCP_FN(Thaana) - -static const unichar ucp_Thai_def[] = { - 0xe01, 0xe3a, - 0xe40, 0xe5b -}; -UCP_FN(Thai) - -static const unichar ucp_Tibetan_def[] = { - 0xf00, 0xf47, - 0xf49, 0xf6c, - 0xf71, 0xf97, - 0xf99, 0xfbc, - 0xfbe, 0xfcc, - 0xfce, 0xfd4, - 0xfd9, 0xfda -}; -UCP_FN(Tibetan) - -static const unichar ucp_Tifinagh_def[] = { - 0x2d30, 0x2d67, - 0x2d6f, 0x2d70, - 0x2d7f, 0x2d7f -}; -UCP_FN(Tifinagh) - -static const unichar ucp_Tirhuta_def[] = { - 0x11480, 0x114c7, - 0x114d0, 0x114d9 -}; -UCP_FN(Tirhuta) - -static const unichar ucp_Ugaritic_def[] = { - 0x10380, 0x1039d, - 0x1039f, 0x1039f -}; -UCP_FN(Ugaritic) - -static const unichar ucp_Vai_def[] = { - 0xa500, 0xa62b -}; -UCP_FN(Vai) - -static const unichar ucp_Warang_Citi_def[] = { - 0x118a0, 0x118f2, - 0x118ff, 0x118ff -}; -UCP_FN(Warang_Citi) - -static const unichar ucp_Yi_def[] = { - 0xa000, 0xa48c, - 0xa490, 0xa4c6 -}; -UCP_FN(Yi) - -static const unicase ucp_caseless_def[] = { - {0x41, 0x61}, - {0x42, 0x62}, - {0x43, 0x63}, - {0x44, 0x64}, - {0x45, 0x65}, - {0x46, 0x66}, - {0x47, 0x67}, - {0x48, 0x68}, - {0x49, 0x69}, - {0x4a, 0x6a}, - {0x4b, 0x6b}, - {0x4b, 0x212a}, - {0x4c, 0x6c}, - {0x4d, 0x6d}, - {0x4e, 0x6e}, - {0x4f, 0x6f}, - {0x50, 0x70}, - {0x51, 0x71}, - {0x52, 0x72}, - {0x53, 0x73}, - {0x53, 0x17f}, - {0x54, 0x74}, - {0x55, 0x75}, - {0x56, 0x76}, - {0x57, 0x77}, - {0x58, 0x78}, - {0x59, 0x79}, - {0x5a, 0x7a}, - {0x61, 0x41}, - {0x62, 0x42}, - {0x63, 0x43}, - {0x64, 0x44}, - {0x65, 0x45}, - {0x66, 0x46}, - {0x67, 0x47}, - {0x68, 0x48}, - {0x69, 0x49}, - {0x6a, 0x4a}, - {0x6b, 0x4b}, - {0x6b, 0x212a}, - {0x6c, 0x4c}, - {0x6d, 0x4d}, - {0x6e, 0x4e}, - {0x6f, 0x4f}, - {0x70, 0x50}, - {0x71, 0x51}, - {0x72, 0x52}, - {0x73, 0x53}, - {0x73, 0x17f}, - {0x74, 0x54}, - {0x75, 0x55}, - {0x76, 0x56}, - {0x77, 0x57}, - {0x78, 0x58}, - {0x79, 0x59}, - {0x7a, 0x5a}, - {0xb5, 0x39c}, - {0xb5, 0x3bc}, - {0xc0, 0xe0}, - {0xc1, 0xe1}, - {0xc2, 0xe2}, - {0xc3, 0xe3}, - {0xc4, 0xe4}, - {0xc5, 0xe5}, - {0xc5, 0x212b}, - {0xc6, 0xe6}, - {0xc7, 0xe7}, - {0xc8, 0xe8}, - {0xc9, 0xe9}, - {0xca, 0xea}, - {0xcb, 0xeb}, - {0xcc, 0xec}, - {0xcd, 0xed}, - {0xce, 0xee}, - {0xcf, 0xef}, - {0xd0, 0xf0}, - {0xd1, 0xf1}, - {0xd2, 0xf2}, - {0xd3, 0xf3}, - {0xd4, 0xf4}, - {0xd5, 0xf5}, - {0xd6, 0xf6}, - {0xd8, 0xf8}, - {0xd9, 0xf9}, - {0xda, 0xfa}, - {0xdb, 0xfb}, - {0xdc, 0xfc}, - {0xdd, 0xfd}, - {0xde, 0xfe}, - {0xdf, 0x1e9e}, - {0xe0, 0xc0}, - {0xe1, 0xc1}, - {0xe2, 0xc2}, - {0xe3, 0xc3}, - {0xe4, 0xc4}, - {0xe5, 0xc5}, - {0xe5, 0x212b}, - {0xe6, 0xc6}, - {0xe7, 0xc7}, - {0xe8, 0xc8}, - {0xe9, 0xc9}, - {0xea, 0xca}, - {0xeb, 0xcb}, - {0xec, 0xcc}, - {0xed, 0xcd}, - {0xee, 0xce}, - {0xef, 0xcf}, - {0xf0, 0xd0}, - {0xf1, 0xd1}, - {0xf2, 0xd2}, - {0xf3, 0xd3}, - {0xf4, 0xd4}, - {0xf5, 0xd5}, - {0xf6, 0xd6}, - {0xf8, 0xd8}, - {0xf9, 0xd9}, - {0xfa, 0xda}, - {0xfb, 0xdb}, - {0xfc, 0xdc}, - {0xfd, 0xdd}, - {0xfe, 0xde}, - {0xff, 0x178}, - {0x100, 0x101}, - {0x101, 0x100}, - {0x102, 0x103}, - {0x103, 0x102}, - {0x104, 0x105}, - {0x105, 0x104}, - {0x106, 0x107}, - {0x107, 0x106}, - {0x108, 0x109}, - {0x109, 0x108}, - {0x10a, 0x10b}, - {0x10b, 0x10a}, - {0x10c, 0x10d}, - {0x10d, 0x10c}, - {0x10e, 0x10f}, - {0x10f, 0x10e}, - {0x110, 0x111}, - {0x111, 0x110}, - {0x112, 0x113}, - {0x113, 0x112}, - {0x114, 0x115}, - {0x115, 0x114}, - {0x116, 0x117}, - {0x117, 0x116}, - {0x118, 0x119}, - {0x119, 0x118}, - {0x11a, 0x11b}, - {0x11b, 0x11a}, - {0x11c, 0x11d}, - {0x11d, 0x11c}, - {0x11e, 0x11f}, - {0x11f, 0x11e}, - {0x120, 0x121}, - {0x121, 0x120}, - {0x122, 0x123}, - {0x123, 0x122}, - {0x124, 0x125}, - {0x125, 0x124}, - {0x126, 0x127}, - {0x127, 0x126}, - {0x128, 0x129}, - {0x129, 0x128}, - {0x12a, 0x12b}, - {0x12b, 0x12a}, - {0x12c, 0x12d}, - {0x12d, 0x12c}, - {0x12e, 0x12f}, - {0x12f, 0x12e}, - {0x132, 0x133}, - {0x133, 0x132}, - {0x134, 0x135}, - {0x135, 0x134}, - {0x136, 0x137}, - {0x137, 0x136}, - {0x139, 0x13a}, - {0x13a, 0x139}, - {0x13b, 0x13c}, - {0x13c, 0x13b}, - {0x13d, 0x13e}, - {0x13e, 0x13d}, - {0x13f, 0x140}, - {0x140, 0x13f}, - {0x141, 0x142}, - {0x142, 0x141}, - {0x143, 0x144}, - {0x144, 0x143}, - {0x145, 0x146}, - {0x146, 0x145}, - {0x147, 0x148}, - {0x148, 0x147}, - {0x14a, 0x14b}, - {0x14b, 0x14a}, - {0x14c, 0x14d}, - {0x14d, 0x14c}, - {0x14e, 0x14f}, - {0x14f, 0x14e}, - {0x150, 0x151}, - {0x151, 0x150}, - {0x152, 0x153}, - {0x153, 0x152}, - {0x154, 0x155}, - {0x155, 0x154}, - {0x156, 0x157}, - {0x157, 0x156}, - {0x158, 0x159}, - {0x159, 0x158}, - {0x15a, 0x15b}, - {0x15b, 0x15a}, - {0x15c, 0x15d}, - {0x15d, 0x15c}, - {0x15e, 0x15f}, - {0x15f, 0x15e}, - {0x160, 0x161}, - {0x161, 0x160}, - {0x162, 0x163}, - {0x163, 0x162}, - {0x164, 0x165}, - {0x165, 0x164}, - {0x166, 0x167}, - {0x167, 0x166}, - {0x168, 0x169}, - {0x169, 0x168}, - {0x16a, 0x16b}, - {0x16b, 0x16a}, - {0x16c, 0x16d}, - {0x16d, 0x16c}, - {0x16e, 0x16f}, - {0x16f, 0x16e}, - {0x170, 0x171}, - {0x171, 0x170}, - {0x172, 0x173}, - {0x173, 0x172}, - {0x174, 0x175}, - {0x175, 0x174}, - {0x176, 0x177}, - {0x177, 0x176}, - {0x178, 0xff}, - {0x179, 0x17a}, - {0x17a, 0x179}, - {0x17b, 0x17c}, - {0x17c, 0x17b}, - {0x17d, 0x17e}, - {0x17e, 0x17d}, - {0x17f, 0x53}, - {0x17f, 0x73}, - {0x180, 0x243}, - {0x181, 0x253}, - {0x182, 0x183}, - {0x183, 0x182}, - {0x184, 0x185}, - {0x185, 0x184}, - {0x186, 0x254}, - {0x187, 0x188}, - {0x188, 0x187}, - {0x189, 0x256}, - {0x18a, 0x257}, - {0x18b, 0x18c}, - {0x18c, 0x18b}, - {0x18e, 0x1dd}, - {0x18f, 0x259}, - {0x190, 0x25b}, - {0x191, 0x192}, - {0x192, 0x191}, - {0x193, 0x260}, - {0x194, 0x263}, - {0x195, 0x1f6}, - {0x196, 0x269}, - {0x197, 0x268}, - {0x198, 0x199}, - {0x199, 0x198}, - {0x19a, 0x23d}, - {0x19c, 0x26f}, - {0x19d, 0x272}, - {0x19e, 0x220}, - {0x19f, 0x275}, - {0x1a0, 0x1a1}, - {0x1a1, 0x1a0}, - {0x1a2, 0x1a3}, - {0x1a3, 0x1a2}, - {0x1a4, 0x1a5}, - {0x1a5, 0x1a4}, - {0x1a6, 0x280}, - {0x1a7, 0x1a8}, - {0x1a8, 0x1a7}, - {0x1a9, 0x283}, - {0x1ac, 0x1ad}, - {0x1ad, 0x1ac}, - {0x1ae, 0x288}, - {0x1af, 0x1b0}, - {0x1b0, 0x1af}, - {0x1b1, 0x28a}, - {0x1b2, 0x28b}, - {0x1b3, 0x1b4}, - {0x1b4, 0x1b3}, - {0x1b5, 0x1b6}, - {0x1b6, 0x1b5}, - {0x1b7, 0x292}, - {0x1b8, 0x1b9}, - {0x1b9, 0x1b8}, - {0x1bc, 0x1bd}, - {0x1bd, 0x1bc}, - {0x1bf, 0x1f7}, - {0x1c4, 0x1c5}, - {0x1c4, 0x1c6}, - {0x1c5, 0x1c4}, - {0x1c5, 0x1c6}, - {0x1c6, 0x1c4}, - {0x1c6, 0x1c5}, - {0x1c7, 0x1c8}, - {0x1c7, 0x1c9}, - {0x1c8, 0x1c7}, - {0x1c8, 0x1c9}, - {0x1c9, 0x1c7}, - {0x1c9, 0x1c8}, - {0x1ca, 0x1cb}, - {0x1ca, 0x1cc}, - {0x1cb, 0x1ca}, - {0x1cb, 0x1cc}, - {0x1cc, 0x1ca}, - {0x1cc, 0x1cb}, - {0x1cd, 0x1ce}, - {0x1ce, 0x1cd}, - {0x1cf, 0x1d0}, - {0x1d0, 0x1cf}, - {0x1d1, 0x1d2}, - {0x1d2, 0x1d1}, - {0x1d3, 0x1d4}, - {0x1d4, 0x1d3}, - {0x1d5, 0x1d6}, - {0x1d6, 0x1d5}, - {0x1d7, 0x1d8}, - {0x1d8, 0x1d7}, - {0x1d9, 0x1da}, - {0x1da, 0x1d9}, - {0x1db, 0x1dc}, - {0x1dc, 0x1db}, - {0x1dd, 0x18e}, - {0x1de, 0x1df}, - {0x1df, 0x1de}, - {0x1e0, 0x1e1}, - {0x1e1, 0x1e0}, - {0x1e2, 0x1e3}, - {0x1e3, 0x1e2}, - {0x1e4, 0x1e5}, - {0x1e5, 0x1e4}, - {0x1e6, 0x1e7}, - {0x1e7, 0x1e6}, - {0x1e8, 0x1e9}, - {0x1e9, 0x1e8}, - {0x1ea, 0x1eb}, - {0x1eb, 0x1ea}, - {0x1ec, 0x1ed}, - {0x1ed, 0x1ec}, - {0x1ee, 0x1ef}, - {0x1ef, 0x1ee}, - {0x1f1, 0x1f2}, - {0x1f1, 0x1f3}, - {0x1f2, 0x1f1}, - {0x1f2, 0x1f3}, - {0x1f3, 0x1f1}, - {0x1f3, 0x1f2}, - {0x1f4, 0x1f5}, - {0x1f5, 0x1f4}, - {0x1f6, 0x195}, - {0x1f7, 0x1bf}, - {0x1f8, 0x1f9}, - {0x1f9, 0x1f8}, - {0x1fa, 0x1fb}, - {0x1fb, 0x1fa}, - {0x1fc, 0x1fd}, - {0x1fd, 0x1fc}, - {0x1fe, 0x1ff}, - {0x1ff, 0x1fe}, - {0x200, 0x201}, - {0x201, 0x200}, - {0x202, 0x203}, - {0x203, 0x202}, - {0x204, 0x205}, - {0x205, 0x204}, - {0x206, 0x207}, - {0x207, 0x206}, - {0x208, 0x209}, - {0x209, 0x208}, - {0x20a, 0x20b}, - {0x20b, 0x20a}, - {0x20c, 0x20d}, - {0x20d, 0x20c}, - {0x20e, 0x20f}, - {0x20f, 0x20e}, - {0x210, 0x211}, - {0x211, 0x210}, - {0x212, 0x213}, - {0x213, 0x212}, - {0x214, 0x215}, - {0x215, 0x214}, - {0x216, 0x217}, - {0x217, 0x216}, - {0x218, 0x219}, - {0x219, 0x218}, - {0x21a, 0x21b}, - {0x21b, 0x21a}, - {0x21c, 0x21d}, - {0x21d, 0x21c}, - {0x21e, 0x21f}, - {0x21f, 0x21e}, - {0x220, 0x19e}, - {0x222, 0x223}, - {0x223, 0x222}, - {0x224, 0x225}, - {0x225, 0x224}, - {0x226, 0x227}, - {0x227, 0x226}, - {0x228, 0x229}, - {0x229, 0x228}, - {0x22a, 0x22b}, - {0x22b, 0x22a}, - {0x22c, 0x22d}, - {0x22d, 0x22c}, - {0x22e, 0x22f}, - {0x22f, 0x22e}, - {0x230, 0x231}, - {0x231, 0x230}, - {0x232, 0x233}, - {0x233, 0x232}, - {0x23a, 0x2c65}, - {0x23b, 0x23c}, - {0x23c, 0x23b}, - {0x23d, 0x19a}, - {0x23e, 0x2c66}, - {0x23f, 0x2c7e}, - {0x240, 0x2c7f}, - {0x241, 0x242}, - {0x242, 0x241}, - {0x243, 0x180}, - {0x244, 0x289}, - {0x245, 0x28c}, - {0x246, 0x247}, - {0x247, 0x246}, - {0x248, 0x249}, - {0x249, 0x248}, - {0x24a, 0x24b}, - {0x24b, 0x24a}, - {0x24c, 0x24d}, - {0x24d, 0x24c}, - {0x24e, 0x24f}, - {0x24f, 0x24e}, - {0x250, 0x2c6f}, - {0x251, 0x2c6d}, - {0x252, 0x2c70}, - {0x253, 0x181}, - {0x254, 0x186}, - {0x256, 0x189}, - {0x257, 0x18a}, - {0x259, 0x18f}, - {0x25b, 0x190}, - {0x25c, 0xa7ab}, - {0x260, 0x193}, - {0x261, 0xa7ac}, - {0x263, 0x194}, - {0x265, 0xa78d}, - {0x266, 0xa7aa}, - {0x268, 0x197}, - {0x269, 0x196}, - {0x26b, 0x2c62}, - {0x26c, 0xa7ad}, - {0x26f, 0x19c}, - {0x271, 0x2c6e}, - {0x272, 0x19d}, - {0x275, 0x19f}, - {0x27d, 0x2c64}, - {0x280, 0x1a6}, - {0x283, 0x1a9}, - {0x287, 0xa7b1}, - {0x288, 0x1ae}, - {0x289, 0x244}, - {0x28a, 0x1b1}, - {0x28b, 0x1b2}, - {0x28c, 0x245}, - {0x292, 0x1b7}, - {0x29e, 0xa7b0}, - {0x345, 0x399}, - {0x345, 0x3b9}, - {0x345, 0x1fbe}, - {0x370, 0x371}, - {0x371, 0x370}, - {0x372, 0x373}, - {0x373, 0x372}, - {0x376, 0x377}, - {0x377, 0x376}, - {0x37b, 0x3fd}, - {0x37c, 0x3fe}, - {0x37d, 0x3ff}, - {0x37f, 0x3f3}, - {0x386, 0x3ac}, - {0x388, 0x3ad}, - {0x389, 0x3ae}, - {0x38a, 0x3af}, - {0x38c, 0x3cc}, - {0x38e, 0x3cd}, - {0x38f, 0x3ce}, - {0x391, 0x3b1}, - {0x392, 0x3b2}, - {0x392, 0x3d0}, - {0x393, 0x3b3}, - {0x394, 0x3b4}, - {0x395, 0x3b5}, - {0x395, 0x3f5}, - {0x396, 0x3b6}, - {0x397, 0x3b7}, - {0x398, 0x3b8}, - {0x398, 0x3d1}, - {0x398, 0x3f4}, - {0x399, 0x345}, - {0x399, 0x3b9}, - {0x399, 0x1fbe}, - {0x39a, 0x3ba}, - {0x39a, 0x3f0}, - {0x39b, 0x3bb}, - {0x39c, 0xb5}, - {0x39c, 0x3bc}, - {0x39d, 0x3bd}, - {0x39e, 0x3be}, - {0x39f, 0x3bf}, - {0x3a0, 0x3c0}, - {0x3a0, 0x3d6}, - {0x3a1, 0x3c1}, - {0x3a1, 0x3f1}, - {0x3a3, 0x3c2}, - {0x3a3, 0x3c3}, - {0x3a4, 0x3c4}, - {0x3a5, 0x3c5}, - {0x3a6, 0x3c6}, - {0x3a6, 0x3d5}, - {0x3a7, 0x3c7}, - {0x3a8, 0x3c8}, - {0x3a9, 0x3c9}, - {0x3a9, 0x2126}, - {0x3aa, 0x3ca}, - {0x3ab, 0x3cb}, - {0x3ac, 0x386}, - {0x3ad, 0x388}, - {0x3ae, 0x389}, - {0x3af, 0x38a}, - {0x3b1, 0x391}, - {0x3b2, 0x392}, - {0x3b2, 0x3d0}, - {0x3b3, 0x393}, - {0x3b4, 0x394}, - {0x3b5, 0x395}, - {0x3b5, 0x3f5}, - {0x3b6, 0x396}, - {0x3b7, 0x397}, - {0x3b8, 0x398}, - {0x3b8, 0x3d1}, - {0x3b8, 0x3f4}, - {0x3b9, 0x345}, - {0x3b9, 0x399}, - {0x3b9, 0x1fbe}, - {0x3ba, 0x39a}, - {0x3ba, 0x3f0}, - {0x3bb, 0x39b}, - {0x3bc, 0xb5}, - {0x3bc, 0x39c}, - {0x3bd, 0x39d}, - {0x3be, 0x39e}, - {0x3bf, 0x39f}, - {0x3c0, 0x3a0}, - {0x3c0, 0x3d6}, - {0x3c1, 0x3a1}, - {0x3c1, 0x3f1}, - {0x3c2, 0x3a3}, - {0x3c2, 0x3c3}, - {0x3c3, 0x3a3}, - {0x3c3, 0x3c2}, - {0x3c4, 0x3a4}, - {0x3c5, 0x3a5}, - {0x3c6, 0x3a6}, - {0x3c6, 0x3d5}, - {0x3c7, 0x3a7}, - {0x3c8, 0x3a8}, - {0x3c9, 0x3a9}, - {0x3c9, 0x2126}, - {0x3ca, 0x3aa}, - {0x3cb, 0x3ab}, - {0x3cc, 0x38c}, - {0x3cd, 0x38e}, - {0x3ce, 0x38f}, - {0x3cf, 0x3d7}, - {0x3d0, 0x392}, - {0x3d0, 0x3b2}, - {0x3d1, 0x398}, - {0x3d1, 0x3b8}, - {0x3d1, 0x3f4}, - {0x3d5, 0x3a6}, - {0x3d5, 0x3c6}, - {0x3d6, 0x3a0}, - {0x3d6, 0x3c0}, - {0x3d7, 0x3cf}, - {0x3d8, 0x3d9}, - {0x3d9, 0x3d8}, - {0x3da, 0x3db}, - {0x3db, 0x3da}, - {0x3dc, 0x3dd}, - {0x3dd, 0x3dc}, - {0x3de, 0x3df}, - {0x3df, 0x3de}, - {0x3e0, 0x3e1}, - {0x3e1, 0x3e0}, - {0x3e2, 0x3e3}, - {0x3e3, 0x3e2}, - {0x3e4, 0x3e5}, - {0x3e5, 0x3e4}, - {0x3e6, 0x3e7}, - {0x3e7, 0x3e6}, - {0x3e8, 0x3e9}, - {0x3e9, 0x3e8}, - {0x3ea, 0x3eb}, - {0x3eb, 0x3ea}, - {0x3ec, 0x3ed}, - {0x3ed, 0x3ec}, - {0x3ee, 0x3ef}, - {0x3ef, 0x3ee}, - {0x3f0, 0x39a}, - {0x3f0, 0x3ba}, - {0x3f1, 0x3a1}, - {0x3f1, 0x3c1}, - {0x3f2, 0x3f9}, - {0x3f3, 0x37f}, - {0x3f4, 0x398}, - {0x3f4, 0x3b8}, - {0x3f4, 0x3d1}, - {0x3f5, 0x395}, - {0x3f5, 0x3b5}, - {0x3f7, 0x3f8}, - {0x3f8, 0x3f7}, - {0x3f9, 0x3f2}, - {0x3fa, 0x3fb}, - {0x3fb, 0x3fa}, - {0x3fd, 0x37b}, - {0x3fe, 0x37c}, - {0x3ff, 0x37d}, - {0x400, 0x450}, - {0x401, 0x451}, - {0x402, 0x452}, - {0x403, 0x453}, - {0x404, 0x454}, - {0x405, 0x455}, - {0x406, 0x456}, - {0x407, 0x457}, - {0x408, 0x458}, - {0x409, 0x459}, - {0x40a, 0x45a}, - {0x40b, 0x45b}, - {0x40c, 0x45c}, - {0x40d, 0x45d}, - {0x40e, 0x45e}, - {0x40f, 0x45f}, - {0x410, 0x430}, - {0x411, 0x431}, - {0x412, 0x432}, - {0x413, 0x433}, - {0x414, 0x434}, - {0x415, 0x435}, - {0x416, 0x436}, - {0x417, 0x437}, - {0x418, 0x438}, - {0x419, 0x439}, - {0x41a, 0x43a}, - {0x41b, 0x43b}, - {0x41c, 0x43c}, - {0x41d, 0x43d}, - {0x41e, 0x43e}, - {0x41f, 0x43f}, - {0x420, 0x440}, - {0x421, 0x441}, - {0x422, 0x442}, - {0x423, 0x443}, - {0x424, 0x444}, - {0x425, 0x445}, - {0x426, 0x446}, - {0x427, 0x447}, - {0x428, 0x448}, - {0x429, 0x449}, - {0x42a, 0x44a}, - {0x42b, 0x44b}, - {0x42c, 0x44c}, - {0x42d, 0x44d}, - {0x42e, 0x44e}, - {0x42f, 0x44f}, - {0x430, 0x410}, - {0x431, 0x411}, - {0x432, 0x412}, - {0x433, 0x413}, - {0x434, 0x414}, - {0x435, 0x415}, - {0x436, 0x416}, - {0x437, 0x417}, - {0x438, 0x418}, - {0x439, 0x419}, - {0x43a, 0x41a}, - {0x43b, 0x41b}, - {0x43c, 0x41c}, - {0x43d, 0x41d}, - {0x43e, 0x41e}, - {0x43f, 0x41f}, - {0x440, 0x420}, - {0x441, 0x421}, - {0x442, 0x422}, - {0x443, 0x423}, - {0x444, 0x424}, - {0x445, 0x425}, - {0x446, 0x426}, - {0x447, 0x427}, - {0x448, 0x428}, - {0x449, 0x429}, - {0x44a, 0x42a}, - {0x44b, 0x42b}, - {0x44c, 0x42c}, - {0x44d, 0x42d}, - {0x44e, 0x42e}, - {0x44f, 0x42f}, - {0x450, 0x400}, - {0x451, 0x401}, - {0x452, 0x402}, - {0x453, 0x403}, - {0x454, 0x404}, - {0x455, 0x405}, - {0x456, 0x406}, - {0x457, 0x407}, - {0x458, 0x408}, - {0x459, 0x409}, - {0x45a, 0x40a}, - {0x45b, 0x40b}, - {0x45c, 0x40c}, - {0x45d, 0x40d}, - {0x45e, 0x40e}, - {0x45f, 0x40f}, - {0x460, 0x461}, - {0x461, 0x460}, - {0x462, 0x463}, - {0x463, 0x462}, - {0x464, 0x465}, - {0x465, 0x464}, - {0x466, 0x467}, - {0x467, 0x466}, - {0x468, 0x469}, - {0x469, 0x468}, - {0x46a, 0x46b}, - {0x46b, 0x46a}, - {0x46c, 0x46d}, - {0x46d, 0x46c}, - {0x46e, 0x46f}, - {0x46f, 0x46e}, - {0x470, 0x471}, - {0x471, 0x470}, - {0x472, 0x473}, - {0x473, 0x472}, - {0x474, 0x475}, - {0x475, 0x474}, - {0x476, 0x477}, - {0x477, 0x476}, - {0x478, 0x479}, - {0x479, 0x478}, - {0x47a, 0x47b}, - {0x47b, 0x47a}, - {0x47c, 0x47d}, - {0x47d, 0x47c}, - {0x47e, 0x47f}, - {0x47f, 0x47e}, - {0x480, 0x481}, - {0x481, 0x480}, - {0x48a, 0x48b}, - {0x48b, 0x48a}, - {0x48c, 0x48d}, - {0x48d, 0x48c}, - {0x48e, 0x48f}, - {0x48f, 0x48e}, - {0x490, 0x491}, - {0x491, 0x490}, - {0x492, 0x493}, - {0x493, 0x492}, - {0x494, 0x495}, - {0x495, 0x494}, - {0x496, 0x497}, - {0x497, 0x496}, - {0x498, 0x499}, - {0x499, 0x498}, - {0x49a, 0x49b}, - {0x49b, 0x49a}, - {0x49c, 0x49d}, - {0x49d, 0x49c}, - {0x49e, 0x49f}, - {0x49f, 0x49e}, - {0x4a0, 0x4a1}, - {0x4a1, 0x4a0}, - {0x4a2, 0x4a3}, - {0x4a3, 0x4a2}, - {0x4a4, 0x4a5}, - {0x4a5, 0x4a4}, - {0x4a6, 0x4a7}, - {0x4a7, 0x4a6}, - {0x4a8, 0x4a9}, - {0x4a9, 0x4a8}, - {0x4aa, 0x4ab}, - {0x4ab, 0x4aa}, - {0x4ac, 0x4ad}, - {0x4ad, 0x4ac}, - {0x4ae, 0x4af}, - {0x4af, 0x4ae}, - {0x4b0, 0x4b1}, - {0x4b1, 0x4b0}, - {0x4b2, 0x4b3}, - {0x4b3, 0x4b2}, - {0x4b4, 0x4b5}, - {0x4b5, 0x4b4}, - {0x4b6, 0x4b7}, - {0x4b7, 0x4b6}, - {0x4b8, 0x4b9}, - {0x4b9, 0x4b8}, - {0x4ba, 0x4bb}, - {0x4bb, 0x4ba}, - {0x4bc, 0x4bd}, - {0x4bd, 0x4bc}, - {0x4be, 0x4bf}, - {0x4bf, 0x4be}, - {0x4c0, 0x4cf}, - {0x4c1, 0x4c2}, - {0x4c2, 0x4c1}, - {0x4c3, 0x4c4}, - {0x4c4, 0x4c3}, - {0x4c5, 0x4c6}, - {0x4c6, 0x4c5}, - {0x4c7, 0x4c8}, - {0x4c8, 0x4c7}, - {0x4c9, 0x4ca}, - {0x4ca, 0x4c9}, - {0x4cb, 0x4cc}, - {0x4cc, 0x4cb}, - {0x4cd, 0x4ce}, - {0x4ce, 0x4cd}, - {0x4cf, 0x4c0}, - {0x4d0, 0x4d1}, - {0x4d1, 0x4d0}, - {0x4d2, 0x4d3}, - {0x4d3, 0x4d2}, - {0x4d4, 0x4d5}, - {0x4d5, 0x4d4}, - {0x4d6, 0x4d7}, - {0x4d7, 0x4d6}, - {0x4d8, 0x4d9}, - {0x4d9, 0x4d8}, - {0x4da, 0x4db}, - {0x4db, 0x4da}, - {0x4dc, 0x4dd}, - {0x4dd, 0x4dc}, - {0x4de, 0x4df}, - {0x4df, 0x4de}, - {0x4e0, 0x4e1}, - {0x4e1, 0x4e0}, - {0x4e2, 0x4e3}, - {0x4e3, 0x4e2}, - {0x4e4, 0x4e5}, - {0x4e5, 0x4e4}, - {0x4e6, 0x4e7}, - {0x4e7, 0x4e6}, - {0x4e8, 0x4e9}, - {0x4e9, 0x4e8}, - {0x4ea, 0x4eb}, - {0x4eb, 0x4ea}, - {0x4ec, 0x4ed}, - {0x4ed, 0x4ec}, - {0x4ee, 0x4ef}, - {0x4ef, 0x4ee}, - {0x4f0, 0x4f1}, - {0x4f1, 0x4f0}, - {0x4f2, 0x4f3}, - {0x4f3, 0x4f2}, - {0x4f4, 0x4f5}, - {0x4f5, 0x4f4}, - {0x4f6, 0x4f7}, - {0x4f7, 0x4f6}, - {0x4f8, 0x4f9}, - {0x4f9, 0x4f8}, - {0x4fa, 0x4fb}, - {0x4fb, 0x4fa}, - {0x4fc, 0x4fd}, - {0x4fd, 0x4fc}, - {0x4fe, 0x4ff}, - {0x4ff, 0x4fe}, - {0x500, 0x501}, - {0x501, 0x500}, - {0x502, 0x503}, - {0x503, 0x502}, - {0x504, 0x505}, - {0x505, 0x504}, - {0x506, 0x507}, - {0x507, 0x506}, - {0x508, 0x509}, - {0x509, 0x508}, - {0x50a, 0x50b}, - {0x50b, 0x50a}, - {0x50c, 0x50d}, - {0x50d, 0x50c}, - {0x50e, 0x50f}, - {0x50f, 0x50e}, - {0x510, 0x511}, - {0x511, 0x510}, - {0x512, 0x513}, - {0x513, 0x512}, - {0x514, 0x515}, - {0x515, 0x514}, - {0x516, 0x517}, - {0x517, 0x516}, - {0x518, 0x519}, - {0x519, 0x518}, - {0x51a, 0x51b}, - {0x51b, 0x51a}, - {0x51c, 0x51d}, - {0x51d, 0x51c}, - {0x51e, 0x51f}, - {0x51f, 0x51e}, - {0x520, 0x521}, - {0x521, 0x520}, - {0x522, 0x523}, - {0x523, 0x522}, - {0x524, 0x525}, - {0x525, 0x524}, - {0x526, 0x527}, - {0x527, 0x526}, - {0x528, 0x529}, - {0x529, 0x528}, - {0x52a, 0x52b}, - {0x52b, 0x52a}, - {0x52c, 0x52d}, - {0x52d, 0x52c}, - {0x52e, 0x52f}, - {0x52f, 0x52e}, - {0x531, 0x561}, - {0x532, 0x562}, - {0x533, 0x563}, - {0x534, 0x564}, - {0x535, 0x565}, - {0x536, 0x566}, - {0x537, 0x567}, - {0x538, 0x568}, - {0x539, 0x569}, - {0x53a, 0x56a}, - {0x53b, 0x56b}, - {0x53c, 0x56c}, - {0x53d, 0x56d}, - {0x53e, 0x56e}, - {0x53f, 0x56f}, - {0x540, 0x570}, - {0x541, 0x571}, - {0x542, 0x572}, - {0x543, 0x573}, - {0x544, 0x574}, - {0x545, 0x575}, - {0x546, 0x576}, - {0x547, 0x577}, - {0x548, 0x578}, - {0x549, 0x579}, - {0x54a, 0x57a}, - {0x54b, 0x57b}, - {0x54c, 0x57c}, - {0x54d, 0x57d}, - {0x54e, 0x57e}, - {0x54f, 0x57f}, - {0x550, 0x580}, - {0x551, 0x581}, - {0x552, 0x582}, - {0x553, 0x583}, - {0x554, 0x584}, - {0x555, 0x585}, - {0x556, 0x586}, - {0x561, 0x531}, - {0x562, 0x532}, - {0x563, 0x533}, - {0x564, 0x534}, - {0x565, 0x535}, - {0x566, 0x536}, - {0x567, 0x537}, - {0x568, 0x538}, - {0x569, 0x539}, - {0x56a, 0x53a}, - {0x56b, 0x53b}, - {0x56c, 0x53c}, - {0x56d, 0x53d}, - {0x56e, 0x53e}, - {0x56f, 0x53f}, - {0x570, 0x540}, - {0x571, 0x541}, - {0x572, 0x542}, - {0x573, 0x543}, - {0x574, 0x544}, - {0x575, 0x545}, - {0x576, 0x546}, - {0x577, 0x547}, - {0x578, 0x548}, - {0x579, 0x549}, - {0x57a, 0x54a}, - {0x57b, 0x54b}, - {0x57c, 0x54c}, - {0x57d, 0x54d}, - {0x57e, 0x54e}, - {0x57f, 0x54f}, - {0x580, 0x550}, - {0x581, 0x551}, - {0x582, 0x552}, - {0x583, 0x553}, - {0x584, 0x554}, - {0x585, 0x555}, - {0x586, 0x556}, - {0x10a0, 0x2d00}, - {0x10a1, 0x2d01}, - {0x10a2, 0x2d02}, - {0x10a3, 0x2d03}, - {0x10a4, 0x2d04}, - {0x10a5, 0x2d05}, - {0x10a6, 0x2d06}, - {0x10a7, 0x2d07}, - {0x10a8, 0x2d08}, - {0x10a9, 0x2d09}, - {0x10aa, 0x2d0a}, - {0x10ab, 0x2d0b}, - {0x10ac, 0x2d0c}, - {0x10ad, 0x2d0d}, - {0x10ae, 0x2d0e}, - {0x10af, 0x2d0f}, - {0x10b0, 0x2d10}, - {0x10b1, 0x2d11}, - {0x10b2, 0x2d12}, - {0x10b3, 0x2d13}, - {0x10b4, 0x2d14}, - {0x10b5, 0x2d15}, - {0x10b6, 0x2d16}, - {0x10b7, 0x2d17}, - {0x10b8, 0x2d18}, - {0x10b9, 0x2d19}, - {0x10ba, 0x2d1a}, - {0x10bb, 0x2d1b}, - {0x10bc, 0x2d1c}, - {0x10bd, 0x2d1d}, - {0x10be, 0x2d1e}, - {0x10bf, 0x2d1f}, - {0x10c0, 0x2d20}, - {0x10c1, 0x2d21}, - {0x10c2, 0x2d22}, - {0x10c3, 0x2d23}, - {0x10c4, 0x2d24}, - {0x10c5, 0x2d25}, - {0x10c7, 0x2d27}, - {0x10cd, 0x2d2d}, - {0x1d79, 0xa77d}, - {0x1d7d, 0x2c63}, - {0x1e00, 0x1e01}, - {0x1e01, 0x1e00}, - {0x1e02, 0x1e03}, - {0x1e03, 0x1e02}, - {0x1e04, 0x1e05}, - {0x1e05, 0x1e04}, - {0x1e06, 0x1e07}, - {0x1e07, 0x1e06}, - {0x1e08, 0x1e09}, - {0x1e09, 0x1e08}, - {0x1e0a, 0x1e0b}, - {0x1e0b, 0x1e0a}, - {0x1e0c, 0x1e0d}, - {0x1e0d, 0x1e0c}, - {0x1e0e, 0x1e0f}, - {0x1e0f, 0x1e0e}, - {0x1e10, 0x1e11}, - {0x1e11, 0x1e10}, - {0x1e12, 0x1e13}, - {0x1e13, 0x1e12}, - {0x1e14, 0x1e15}, - {0x1e15, 0x1e14}, - {0x1e16, 0x1e17}, - {0x1e17, 0x1e16}, - {0x1e18, 0x1e19}, - {0x1e19, 0x1e18}, - {0x1e1a, 0x1e1b}, - {0x1e1b, 0x1e1a}, - {0x1e1c, 0x1e1d}, - {0x1e1d, 0x1e1c}, - {0x1e1e, 0x1e1f}, - {0x1e1f, 0x1e1e}, - {0x1e20, 0x1e21}, - {0x1e21, 0x1e20}, - {0x1e22, 0x1e23}, - {0x1e23, 0x1e22}, - {0x1e24, 0x1e25}, - {0x1e25, 0x1e24}, - {0x1e26, 0x1e27}, - {0x1e27, 0x1e26}, - {0x1e28, 0x1e29}, - {0x1e29, 0x1e28}, - {0x1e2a, 0x1e2b}, - {0x1e2b, 0x1e2a}, - {0x1e2c, 0x1e2d}, - {0x1e2d, 0x1e2c}, - {0x1e2e, 0x1e2f}, - {0x1e2f, 0x1e2e}, - {0x1e30, 0x1e31}, - {0x1e31, 0x1e30}, - {0x1e32, 0x1e33}, - {0x1e33, 0x1e32}, - {0x1e34, 0x1e35}, - {0x1e35, 0x1e34}, - {0x1e36, 0x1e37}, - {0x1e37, 0x1e36}, - {0x1e38, 0x1e39}, - {0x1e39, 0x1e38}, - {0x1e3a, 0x1e3b}, - {0x1e3b, 0x1e3a}, - {0x1e3c, 0x1e3d}, - {0x1e3d, 0x1e3c}, - {0x1e3e, 0x1e3f}, - {0x1e3f, 0x1e3e}, - {0x1e40, 0x1e41}, - {0x1e41, 0x1e40}, - {0x1e42, 0x1e43}, - {0x1e43, 0x1e42}, - {0x1e44, 0x1e45}, - {0x1e45, 0x1e44}, - {0x1e46, 0x1e47}, - {0x1e47, 0x1e46}, - {0x1e48, 0x1e49}, - {0x1e49, 0x1e48}, - {0x1e4a, 0x1e4b}, - {0x1e4b, 0x1e4a}, - {0x1e4c, 0x1e4d}, - {0x1e4d, 0x1e4c}, - {0x1e4e, 0x1e4f}, - {0x1e4f, 0x1e4e}, - {0x1e50, 0x1e51}, - {0x1e51, 0x1e50}, - {0x1e52, 0x1e53}, - {0x1e53, 0x1e52}, - {0x1e54, 0x1e55}, - {0x1e55, 0x1e54}, - {0x1e56, 0x1e57}, - {0x1e57, 0x1e56}, - {0x1e58, 0x1e59}, - {0x1e59, 0x1e58}, - {0x1e5a, 0x1e5b}, - {0x1e5b, 0x1e5a}, - {0x1e5c, 0x1e5d}, - {0x1e5d, 0x1e5c}, - {0x1e5e, 0x1e5f}, - {0x1e5f, 0x1e5e}, - {0x1e60, 0x1e61}, - {0x1e60, 0x1e9b}, - {0x1e61, 0x1e60}, - {0x1e61, 0x1e9b}, - {0x1e62, 0x1e63}, - {0x1e63, 0x1e62}, - {0x1e64, 0x1e65}, - {0x1e65, 0x1e64}, - {0x1e66, 0x1e67}, - {0x1e67, 0x1e66}, - {0x1e68, 0x1e69}, - {0x1e69, 0x1e68}, - {0x1e6a, 0x1e6b}, - {0x1e6b, 0x1e6a}, - {0x1e6c, 0x1e6d}, - {0x1e6d, 0x1e6c}, - {0x1e6e, 0x1e6f}, - {0x1e6f, 0x1e6e}, - {0x1e70, 0x1e71}, - {0x1e71, 0x1e70}, - {0x1e72, 0x1e73}, - {0x1e73, 0x1e72}, - {0x1e74, 0x1e75}, - {0x1e75, 0x1e74}, - {0x1e76, 0x1e77}, - {0x1e77, 0x1e76}, - {0x1e78, 0x1e79}, - {0x1e79, 0x1e78}, - {0x1e7a, 0x1e7b}, - {0x1e7b, 0x1e7a}, - {0x1e7c, 0x1e7d}, - {0x1e7d, 0x1e7c}, - {0x1e7e, 0x1e7f}, - {0x1e7f, 0x1e7e}, - {0x1e80, 0x1e81}, - {0x1e81, 0x1e80}, - {0x1e82, 0x1e83}, - {0x1e83, 0x1e82}, - {0x1e84, 0x1e85}, - {0x1e85, 0x1e84}, - {0x1e86, 0x1e87}, - {0x1e87, 0x1e86}, - {0x1e88, 0x1e89}, - {0x1e89, 0x1e88}, - {0x1e8a, 0x1e8b}, - {0x1e8b, 0x1e8a}, - {0x1e8c, 0x1e8d}, - {0x1e8d, 0x1e8c}, - {0x1e8e, 0x1e8f}, - {0x1e8f, 0x1e8e}, - {0x1e90, 0x1e91}, - {0x1e91, 0x1e90}, - {0x1e92, 0x1e93}, - {0x1e93, 0x1e92}, - {0x1e94, 0x1e95}, - {0x1e95, 0x1e94}, - {0x1e9b, 0x1e60}, - {0x1e9b, 0x1e61}, - {0x1e9e, 0xdf}, - {0x1ea0, 0x1ea1}, - {0x1ea1, 0x1ea0}, - {0x1ea2, 0x1ea3}, - {0x1ea3, 0x1ea2}, - {0x1ea4, 0x1ea5}, - {0x1ea5, 0x1ea4}, - {0x1ea6, 0x1ea7}, - {0x1ea7, 0x1ea6}, - {0x1ea8, 0x1ea9}, - {0x1ea9, 0x1ea8}, - {0x1eaa, 0x1eab}, - {0x1eab, 0x1eaa}, - {0x1eac, 0x1ead}, - {0x1ead, 0x1eac}, - {0x1eae, 0x1eaf}, - {0x1eaf, 0x1eae}, - {0x1eb0, 0x1eb1}, - {0x1eb1, 0x1eb0}, - {0x1eb2, 0x1eb3}, - {0x1eb3, 0x1eb2}, - {0x1eb4, 0x1eb5}, - {0x1eb5, 0x1eb4}, - {0x1eb6, 0x1eb7}, - {0x1eb7, 0x1eb6}, - {0x1eb8, 0x1eb9}, - {0x1eb9, 0x1eb8}, - {0x1eba, 0x1ebb}, - {0x1ebb, 0x1eba}, - {0x1ebc, 0x1ebd}, - {0x1ebd, 0x1ebc}, - {0x1ebe, 0x1ebf}, - {0x1ebf, 0x1ebe}, - {0x1ec0, 0x1ec1}, - {0x1ec1, 0x1ec0}, - {0x1ec2, 0x1ec3}, - {0x1ec3, 0x1ec2}, - {0x1ec4, 0x1ec5}, - {0x1ec5, 0x1ec4}, - {0x1ec6, 0x1ec7}, - {0x1ec7, 0x1ec6}, - {0x1ec8, 0x1ec9}, - {0x1ec9, 0x1ec8}, - {0x1eca, 0x1ecb}, - {0x1ecb, 0x1eca}, - {0x1ecc, 0x1ecd}, - {0x1ecd, 0x1ecc}, - {0x1ece, 0x1ecf}, - {0x1ecf, 0x1ece}, - {0x1ed0, 0x1ed1}, - {0x1ed1, 0x1ed0}, - {0x1ed2, 0x1ed3}, - {0x1ed3, 0x1ed2}, - {0x1ed4, 0x1ed5}, - {0x1ed5, 0x1ed4}, - {0x1ed6, 0x1ed7}, - {0x1ed7, 0x1ed6}, - {0x1ed8, 0x1ed9}, - {0x1ed9, 0x1ed8}, - {0x1eda, 0x1edb}, - {0x1edb, 0x1eda}, - {0x1edc, 0x1edd}, - {0x1edd, 0x1edc}, - {0x1ede, 0x1edf}, - {0x1edf, 0x1ede}, - {0x1ee0, 0x1ee1}, - {0x1ee1, 0x1ee0}, - {0x1ee2, 0x1ee3}, - {0x1ee3, 0x1ee2}, - {0x1ee4, 0x1ee5}, - {0x1ee5, 0x1ee4}, - {0x1ee6, 0x1ee7}, - {0x1ee7, 0x1ee6}, - {0x1ee8, 0x1ee9}, - {0x1ee9, 0x1ee8}, - {0x1eea, 0x1eeb}, - {0x1eeb, 0x1eea}, - {0x1eec, 0x1eed}, - {0x1eed, 0x1eec}, - {0x1eee, 0x1eef}, - {0x1eef, 0x1eee}, - {0x1ef0, 0x1ef1}, - {0x1ef1, 0x1ef0}, - {0x1ef2, 0x1ef3}, - {0x1ef3, 0x1ef2}, - {0x1ef4, 0x1ef5}, - {0x1ef5, 0x1ef4}, - {0x1ef6, 0x1ef7}, - {0x1ef7, 0x1ef6}, - {0x1ef8, 0x1ef9}, - {0x1ef9, 0x1ef8}, - {0x1efa, 0x1efb}, - {0x1efb, 0x1efa}, - {0x1efc, 0x1efd}, - {0x1efd, 0x1efc}, - {0x1efe, 0x1eff}, - {0x1eff, 0x1efe}, - {0x1f00, 0x1f08}, - {0x1f01, 0x1f09}, - {0x1f02, 0x1f0a}, - {0x1f03, 0x1f0b}, - {0x1f04, 0x1f0c}, - {0x1f05, 0x1f0d}, - {0x1f06, 0x1f0e}, - {0x1f07, 0x1f0f}, - {0x1f08, 0x1f00}, - {0x1f09, 0x1f01}, - {0x1f0a, 0x1f02}, - {0x1f0b, 0x1f03}, - {0x1f0c, 0x1f04}, - {0x1f0d, 0x1f05}, - {0x1f0e, 0x1f06}, - {0x1f0f, 0x1f07}, - {0x1f10, 0x1f18}, - {0x1f11, 0x1f19}, - {0x1f12, 0x1f1a}, - {0x1f13, 0x1f1b}, - {0x1f14, 0x1f1c}, - {0x1f15, 0x1f1d}, - {0x1f18, 0x1f10}, - {0x1f19, 0x1f11}, - {0x1f1a, 0x1f12}, - {0x1f1b, 0x1f13}, - {0x1f1c, 0x1f14}, - {0x1f1d, 0x1f15}, - {0x1f20, 0x1f28}, - {0x1f21, 0x1f29}, - {0x1f22, 0x1f2a}, - {0x1f23, 0x1f2b}, - {0x1f24, 0x1f2c}, - {0x1f25, 0x1f2d}, - {0x1f26, 0x1f2e}, - {0x1f27, 0x1f2f}, - {0x1f28, 0x1f20}, - {0x1f29, 0x1f21}, - {0x1f2a, 0x1f22}, - {0x1f2b, 0x1f23}, - {0x1f2c, 0x1f24}, - {0x1f2d, 0x1f25}, - {0x1f2e, 0x1f26}, - {0x1f2f, 0x1f27}, - {0x1f30, 0x1f38}, - {0x1f31, 0x1f39}, - {0x1f32, 0x1f3a}, - {0x1f33, 0x1f3b}, - {0x1f34, 0x1f3c}, - {0x1f35, 0x1f3d}, - {0x1f36, 0x1f3e}, - {0x1f37, 0x1f3f}, - {0x1f38, 0x1f30}, - {0x1f39, 0x1f31}, - {0x1f3a, 0x1f32}, - {0x1f3b, 0x1f33}, - {0x1f3c, 0x1f34}, - {0x1f3d, 0x1f35}, - {0x1f3e, 0x1f36}, - {0x1f3f, 0x1f37}, - {0x1f40, 0x1f48}, - {0x1f41, 0x1f49}, - {0x1f42, 0x1f4a}, - {0x1f43, 0x1f4b}, - {0x1f44, 0x1f4c}, - {0x1f45, 0x1f4d}, - {0x1f48, 0x1f40}, - {0x1f49, 0x1f41}, - {0x1f4a, 0x1f42}, - {0x1f4b, 0x1f43}, - {0x1f4c, 0x1f44}, - {0x1f4d, 0x1f45}, - {0x1f51, 0x1f59}, - {0x1f53, 0x1f5b}, - {0x1f55, 0x1f5d}, - {0x1f57, 0x1f5f}, - {0x1f59, 0x1f51}, - {0x1f5b, 0x1f53}, - {0x1f5d, 0x1f55}, - {0x1f5f, 0x1f57}, - {0x1f60, 0x1f68}, - {0x1f61, 0x1f69}, - {0x1f62, 0x1f6a}, - {0x1f63, 0x1f6b}, - {0x1f64, 0x1f6c}, - {0x1f65, 0x1f6d}, - {0x1f66, 0x1f6e}, - {0x1f67, 0x1f6f}, - {0x1f68, 0x1f60}, - {0x1f69, 0x1f61}, - {0x1f6a, 0x1f62}, - {0x1f6b, 0x1f63}, - {0x1f6c, 0x1f64}, - {0x1f6d, 0x1f65}, - {0x1f6e, 0x1f66}, - {0x1f6f, 0x1f67}, - {0x1f70, 0x1fba}, - {0x1f71, 0x1fbb}, - {0x1f72, 0x1fc8}, - {0x1f73, 0x1fc9}, - {0x1f74, 0x1fca}, - {0x1f75, 0x1fcb}, - {0x1f76, 0x1fda}, - {0x1f77, 0x1fdb}, - {0x1f78, 0x1ff8}, - {0x1f79, 0x1ff9}, - {0x1f7a, 0x1fea}, - {0x1f7b, 0x1feb}, - {0x1f7c, 0x1ffa}, - {0x1f7d, 0x1ffb}, - {0x1f80, 0x1f88}, - {0x1f81, 0x1f89}, - {0x1f82, 0x1f8a}, - {0x1f83, 0x1f8b}, - {0x1f84, 0x1f8c}, - {0x1f85, 0x1f8d}, - {0x1f86, 0x1f8e}, - {0x1f87, 0x1f8f}, - {0x1f88, 0x1f80}, - {0x1f89, 0x1f81}, - {0x1f8a, 0x1f82}, - {0x1f8b, 0x1f83}, - {0x1f8c, 0x1f84}, - {0x1f8d, 0x1f85}, - {0x1f8e, 0x1f86}, - {0x1f8f, 0x1f87}, - {0x1f90, 0x1f98}, - {0x1f91, 0x1f99}, - {0x1f92, 0x1f9a}, - {0x1f93, 0x1f9b}, - {0x1f94, 0x1f9c}, - {0x1f95, 0x1f9d}, - {0x1f96, 0x1f9e}, - {0x1f97, 0x1f9f}, - {0x1f98, 0x1f90}, - {0x1f99, 0x1f91}, - {0x1f9a, 0x1f92}, - {0x1f9b, 0x1f93}, - {0x1f9c, 0x1f94}, - {0x1f9d, 0x1f95}, - {0x1f9e, 0x1f96}, - {0x1f9f, 0x1f97}, - {0x1fa0, 0x1fa8}, - {0x1fa1, 0x1fa9}, - {0x1fa2, 0x1faa}, - {0x1fa3, 0x1fab}, - {0x1fa4, 0x1fac}, - {0x1fa5, 0x1fad}, - {0x1fa6, 0x1fae}, - {0x1fa7, 0x1faf}, - {0x1fa8, 0x1fa0}, - {0x1fa9, 0x1fa1}, - {0x1faa, 0x1fa2}, - {0x1fab, 0x1fa3}, - {0x1fac, 0x1fa4}, - {0x1fad, 0x1fa5}, - {0x1fae, 0x1fa6}, - {0x1faf, 0x1fa7}, - {0x1fb0, 0x1fb8}, - {0x1fb1, 0x1fb9}, - {0x1fb3, 0x1fbc}, - {0x1fb8, 0x1fb0}, - {0x1fb9, 0x1fb1}, - {0x1fba, 0x1f70}, - {0x1fbb, 0x1f71}, - {0x1fbc, 0x1fb3}, - {0x1fbe, 0x345}, - {0x1fbe, 0x399}, - {0x1fbe, 0x3b9}, - {0x1fc3, 0x1fcc}, - {0x1fc8, 0x1f72}, - {0x1fc9, 0x1f73}, - {0x1fca, 0x1f74}, - {0x1fcb, 0x1f75}, - {0x1fcc, 0x1fc3}, - {0x1fd0, 0x1fd8}, - {0x1fd1, 0x1fd9}, - {0x1fd8, 0x1fd0}, - {0x1fd9, 0x1fd1}, - {0x1fda, 0x1f76}, - {0x1fdb, 0x1f77}, - {0x1fe0, 0x1fe8}, - {0x1fe1, 0x1fe9}, - {0x1fe5, 0x1fec}, - {0x1fe8, 0x1fe0}, - {0x1fe9, 0x1fe1}, - {0x1fea, 0x1f7a}, - {0x1feb, 0x1f7b}, - {0x1fec, 0x1fe5}, - {0x1ff3, 0x1ffc}, - {0x1ff8, 0x1f78}, - {0x1ff9, 0x1f79}, - {0x1ffa, 0x1f7c}, - {0x1ffb, 0x1f7d}, - {0x1ffc, 0x1ff3}, - {0x2126, 0x3a9}, - {0x2126, 0x3c9}, - {0x212a, 0x4b}, - {0x212a, 0x6b}, - {0x212b, 0xc5}, - {0x212b, 0xe5}, - {0x2132, 0x214e}, - {0x214e, 0x2132}, - {0x2160, 0x2170}, - {0x2161, 0x2171}, - {0x2162, 0x2172}, - {0x2163, 0x2173}, - {0x2164, 0x2174}, - {0x2165, 0x2175}, - {0x2166, 0x2176}, - {0x2167, 0x2177}, - {0x2168, 0x2178}, - {0x2169, 0x2179}, - {0x216a, 0x217a}, - {0x216b, 0x217b}, - {0x216c, 0x217c}, - {0x216d, 0x217d}, - {0x216e, 0x217e}, - {0x216f, 0x217f}, - {0x2170, 0x2160}, - {0x2171, 0x2161}, - {0x2172, 0x2162}, - {0x2173, 0x2163}, - {0x2174, 0x2164}, - {0x2175, 0x2165}, - {0x2176, 0x2166}, - {0x2177, 0x2167}, - {0x2178, 0x2168}, - {0x2179, 0x2169}, - {0x217a, 0x216a}, - {0x217b, 0x216b}, - {0x217c, 0x216c}, - {0x217d, 0x216d}, - {0x217e, 0x216e}, - {0x217f, 0x216f}, - {0x2183, 0x2184}, - {0x2184, 0x2183}, - {0x24b6, 0x24d0}, - {0x24b7, 0x24d1}, - {0x24b8, 0x24d2}, - {0x24b9, 0x24d3}, - {0x24ba, 0x24d4}, - {0x24bb, 0x24d5}, - {0x24bc, 0x24d6}, - {0x24bd, 0x24d7}, - {0x24be, 0x24d8}, - {0x24bf, 0x24d9}, - {0x24c0, 0x24da}, - {0x24c1, 0x24db}, - {0x24c2, 0x24dc}, - {0x24c3, 0x24dd}, - {0x24c4, 0x24de}, - {0x24c5, 0x24df}, - {0x24c6, 0x24e0}, - {0x24c7, 0x24e1}, - {0x24c8, 0x24e2}, - {0x24c9, 0x24e3}, - {0x24ca, 0x24e4}, - {0x24cb, 0x24e5}, - {0x24cc, 0x24e6}, - {0x24cd, 0x24e7}, - {0x24ce, 0x24e8}, - {0x24cf, 0x24e9}, - {0x24d0, 0x24b6}, - {0x24d1, 0x24b7}, - {0x24d2, 0x24b8}, - {0x24d3, 0x24b9}, - {0x24d4, 0x24ba}, - {0x24d5, 0x24bb}, - {0x24d6, 0x24bc}, - {0x24d7, 0x24bd}, - {0x24d8, 0x24be}, - {0x24d9, 0x24bf}, - {0x24da, 0x24c0}, - {0x24db, 0x24c1}, - {0x24dc, 0x24c2}, - {0x24dd, 0x24c3}, - {0x24de, 0x24c4}, - {0x24df, 0x24c5}, - {0x24e0, 0x24c6}, - {0x24e1, 0x24c7}, - {0x24e2, 0x24c8}, - {0x24e3, 0x24c9}, - {0x24e4, 0x24ca}, - {0x24e5, 0x24cb}, - {0x24e6, 0x24cc}, - {0x24e7, 0x24cd}, - {0x24e8, 0x24ce}, - {0x24e9, 0x24cf}, - {0x2c00, 0x2c30}, - {0x2c01, 0x2c31}, - {0x2c02, 0x2c32}, - {0x2c03, 0x2c33}, - {0x2c04, 0x2c34}, - {0x2c05, 0x2c35}, - {0x2c06, 0x2c36}, - {0x2c07, 0x2c37}, - {0x2c08, 0x2c38}, - {0x2c09, 0x2c39}, - {0x2c0a, 0x2c3a}, - {0x2c0b, 0x2c3b}, - {0x2c0c, 0x2c3c}, - {0x2c0d, 0x2c3d}, - {0x2c0e, 0x2c3e}, - {0x2c0f, 0x2c3f}, - {0x2c10, 0x2c40}, - {0x2c11, 0x2c41}, - {0x2c12, 0x2c42}, - {0x2c13, 0x2c43}, - {0x2c14, 0x2c44}, - {0x2c15, 0x2c45}, - {0x2c16, 0x2c46}, - {0x2c17, 0x2c47}, - {0x2c18, 0x2c48}, - {0x2c19, 0x2c49}, - {0x2c1a, 0x2c4a}, - {0x2c1b, 0x2c4b}, - {0x2c1c, 0x2c4c}, - {0x2c1d, 0x2c4d}, - {0x2c1e, 0x2c4e}, - {0x2c1f, 0x2c4f}, - {0x2c20, 0x2c50}, - {0x2c21, 0x2c51}, - {0x2c22, 0x2c52}, - {0x2c23, 0x2c53}, - {0x2c24, 0x2c54}, - {0x2c25, 0x2c55}, - {0x2c26, 0x2c56}, - {0x2c27, 0x2c57}, - {0x2c28, 0x2c58}, - {0x2c29, 0x2c59}, - {0x2c2a, 0x2c5a}, - {0x2c2b, 0x2c5b}, - {0x2c2c, 0x2c5c}, - {0x2c2d, 0x2c5d}, - {0x2c2e, 0x2c5e}, - {0x2c30, 0x2c00}, - {0x2c31, 0x2c01}, - {0x2c32, 0x2c02}, - {0x2c33, 0x2c03}, - {0x2c34, 0x2c04}, - {0x2c35, 0x2c05}, - {0x2c36, 0x2c06}, - {0x2c37, 0x2c07}, - {0x2c38, 0x2c08}, - {0x2c39, 0x2c09}, - {0x2c3a, 0x2c0a}, - {0x2c3b, 0x2c0b}, - {0x2c3c, 0x2c0c}, - {0x2c3d, 0x2c0d}, - {0x2c3e, 0x2c0e}, - {0x2c3f, 0x2c0f}, - {0x2c40, 0x2c10}, - {0x2c41, 0x2c11}, - {0x2c42, 0x2c12}, - {0x2c43, 0x2c13}, - {0x2c44, 0x2c14}, - {0x2c45, 0x2c15}, - {0x2c46, 0x2c16}, - {0x2c47, 0x2c17}, - {0x2c48, 0x2c18}, - {0x2c49, 0x2c19}, - {0x2c4a, 0x2c1a}, - {0x2c4b, 0x2c1b}, - {0x2c4c, 0x2c1c}, - {0x2c4d, 0x2c1d}, - {0x2c4e, 0x2c1e}, - {0x2c4f, 0x2c1f}, - {0x2c50, 0x2c20}, - {0x2c51, 0x2c21}, - {0x2c52, 0x2c22}, - {0x2c53, 0x2c23}, - {0x2c54, 0x2c24}, - {0x2c55, 0x2c25}, - {0x2c56, 0x2c26}, - {0x2c57, 0x2c27}, - {0x2c58, 0x2c28}, - {0x2c59, 0x2c29}, - {0x2c5a, 0x2c2a}, - {0x2c5b, 0x2c2b}, - {0x2c5c, 0x2c2c}, - {0x2c5d, 0x2c2d}, - {0x2c5e, 0x2c2e}, - {0x2c60, 0x2c61}, - {0x2c61, 0x2c60}, - {0x2c62, 0x26b}, - {0x2c63, 0x1d7d}, - {0x2c64, 0x27d}, - {0x2c65, 0x23a}, - {0x2c66, 0x23e}, - {0x2c67, 0x2c68}, - {0x2c68, 0x2c67}, - {0x2c69, 0x2c6a}, - {0x2c6a, 0x2c69}, - {0x2c6b, 0x2c6c}, - {0x2c6c, 0x2c6b}, - {0x2c6d, 0x251}, - {0x2c6e, 0x271}, - {0x2c6f, 0x250}, - {0x2c70, 0x252}, - {0x2c72, 0x2c73}, - {0x2c73, 0x2c72}, - {0x2c75, 0x2c76}, - {0x2c76, 0x2c75}, - {0x2c7e, 0x23f}, - {0x2c7f, 0x240}, - {0x2c80, 0x2c81}, - {0x2c81, 0x2c80}, - {0x2c82, 0x2c83}, - {0x2c83, 0x2c82}, - {0x2c84, 0x2c85}, - {0x2c85, 0x2c84}, - {0x2c86, 0x2c87}, - {0x2c87, 0x2c86}, - {0x2c88, 0x2c89}, - {0x2c89, 0x2c88}, - {0x2c8a, 0x2c8b}, - {0x2c8b, 0x2c8a}, - {0x2c8c, 0x2c8d}, - {0x2c8d, 0x2c8c}, - {0x2c8e, 0x2c8f}, - {0x2c8f, 0x2c8e}, - {0x2c90, 0x2c91}, - {0x2c91, 0x2c90}, - {0x2c92, 0x2c93}, - {0x2c93, 0x2c92}, - {0x2c94, 0x2c95}, - {0x2c95, 0x2c94}, - {0x2c96, 0x2c97}, - {0x2c97, 0x2c96}, - {0x2c98, 0x2c99}, - {0x2c99, 0x2c98}, - {0x2c9a, 0x2c9b}, - {0x2c9b, 0x2c9a}, - {0x2c9c, 0x2c9d}, - {0x2c9d, 0x2c9c}, - {0x2c9e, 0x2c9f}, - {0x2c9f, 0x2c9e}, - {0x2ca0, 0x2ca1}, - {0x2ca1, 0x2ca0}, - {0x2ca2, 0x2ca3}, - {0x2ca3, 0x2ca2}, - {0x2ca4, 0x2ca5}, - {0x2ca5, 0x2ca4}, - {0x2ca6, 0x2ca7}, - {0x2ca7, 0x2ca6}, - {0x2ca8, 0x2ca9}, - {0x2ca9, 0x2ca8}, - {0x2caa, 0x2cab}, - {0x2cab, 0x2caa}, - {0x2cac, 0x2cad}, - {0x2cad, 0x2cac}, - {0x2cae, 0x2caf}, - {0x2caf, 0x2cae}, - {0x2cb0, 0x2cb1}, - {0x2cb1, 0x2cb0}, - {0x2cb2, 0x2cb3}, - {0x2cb3, 0x2cb2}, - {0x2cb4, 0x2cb5}, - {0x2cb5, 0x2cb4}, - {0x2cb6, 0x2cb7}, - {0x2cb7, 0x2cb6}, - {0x2cb8, 0x2cb9}, - {0x2cb9, 0x2cb8}, - {0x2cba, 0x2cbb}, - {0x2cbb, 0x2cba}, - {0x2cbc, 0x2cbd}, - {0x2cbd, 0x2cbc}, - {0x2cbe, 0x2cbf}, - {0x2cbf, 0x2cbe}, - {0x2cc0, 0x2cc1}, - {0x2cc1, 0x2cc0}, - {0x2cc2, 0x2cc3}, - {0x2cc3, 0x2cc2}, - {0x2cc4, 0x2cc5}, - {0x2cc5, 0x2cc4}, - {0x2cc6, 0x2cc7}, - {0x2cc7, 0x2cc6}, - {0x2cc8, 0x2cc9}, - {0x2cc9, 0x2cc8}, - {0x2cca, 0x2ccb}, - {0x2ccb, 0x2cca}, - {0x2ccc, 0x2ccd}, - {0x2ccd, 0x2ccc}, - {0x2cce, 0x2ccf}, - {0x2ccf, 0x2cce}, - {0x2cd0, 0x2cd1}, - {0x2cd1, 0x2cd0}, - {0x2cd2, 0x2cd3}, - {0x2cd3, 0x2cd2}, - {0x2cd4, 0x2cd5}, - {0x2cd5, 0x2cd4}, - {0x2cd6, 0x2cd7}, - {0x2cd7, 0x2cd6}, - {0x2cd8, 0x2cd9}, - {0x2cd9, 0x2cd8}, - {0x2cda, 0x2cdb}, - {0x2cdb, 0x2cda}, - {0x2cdc, 0x2cdd}, - {0x2cdd, 0x2cdc}, - {0x2cde, 0x2cdf}, - {0x2cdf, 0x2cde}, - {0x2ce0, 0x2ce1}, - {0x2ce1, 0x2ce0}, - {0x2ce2, 0x2ce3}, - {0x2ce3, 0x2ce2}, - {0x2ceb, 0x2cec}, - {0x2cec, 0x2ceb}, - {0x2ced, 0x2cee}, - {0x2cee, 0x2ced}, - {0x2cf2, 0x2cf3}, - {0x2cf3, 0x2cf2}, - {0x2d00, 0x10a0}, - {0x2d01, 0x10a1}, - {0x2d02, 0x10a2}, - {0x2d03, 0x10a3}, - {0x2d04, 0x10a4}, - {0x2d05, 0x10a5}, - {0x2d06, 0x10a6}, - {0x2d07, 0x10a7}, - {0x2d08, 0x10a8}, - {0x2d09, 0x10a9}, - {0x2d0a, 0x10aa}, - {0x2d0b, 0x10ab}, - {0x2d0c, 0x10ac}, - {0x2d0d, 0x10ad}, - {0x2d0e, 0x10ae}, - {0x2d0f, 0x10af}, - {0x2d10, 0x10b0}, - {0x2d11, 0x10b1}, - {0x2d12, 0x10b2}, - {0x2d13, 0x10b3}, - {0x2d14, 0x10b4}, - {0x2d15, 0x10b5}, - {0x2d16, 0x10b6}, - {0x2d17, 0x10b7}, - {0x2d18, 0x10b8}, - {0x2d19, 0x10b9}, - {0x2d1a, 0x10ba}, - {0x2d1b, 0x10bb}, - {0x2d1c, 0x10bc}, - {0x2d1d, 0x10bd}, - {0x2d1e, 0x10be}, - {0x2d1f, 0x10bf}, - {0x2d20, 0x10c0}, - {0x2d21, 0x10c1}, - {0x2d22, 0x10c2}, - {0x2d23, 0x10c3}, - {0x2d24, 0x10c4}, - {0x2d25, 0x10c5}, - {0x2d27, 0x10c7}, - {0x2d2d, 0x10cd}, - {0xa640, 0xa641}, - {0xa641, 0xa640}, - {0xa642, 0xa643}, - {0xa643, 0xa642}, - {0xa644, 0xa645}, - {0xa645, 0xa644}, - {0xa646, 0xa647}, - {0xa647, 0xa646}, - {0xa648, 0xa649}, - {0xa649, 0xa648}, - {0xa64a, 0xa64b}, - {0xa64b, 0xa64a}, - {0xa64c, 0xa64d}, - {0xa64d, 0xa64c}, - {0xa64e, 0xa64f}, - {0xa64f, 0xa64e}, - {0xa650, 0xa651}, - {0xa651, 0xa650}, - {0xa652, 0xa653}, - {0xa653, 0xa652}, - {0xa654, 0xa655}, - {0xa655, 0xa654}, - {0xa656, 0xa657}, - {0xa657, 0xa656}, - {0xa658, 0xa659}, - {0xa659, 0xa658}, - {0xa65a, 0xa65b}, - {0xa65b, 0xa65a}, - {0xa65c, 0xa65d}, - {0xa65d, 0xa65c}, - {0xa65e, 0xa65f}, - {0xa65f, 0xa65e}, - {0xa660, 0xa661}, - {0xa661, 0xa660}, - {0xa662, 0xa663}, - {0xa663, 0xa662}, - {0xa664, 0xa665}, - {0xa665, 0xa664}, - {0xa666, 0xa667}, - {0xa667, 0xa666}, - {0xa668, 0xa669}, - {0xa669, 0xa668}, - {0xa66a, 0xa66b}, - {0xa66b, 0xa66a}, - {0xa66c, 0xa66d}, - {0xa66d, 0xa66c}, - {0xa680, 0xa681}, - {0xa681, 0xa680}, - {0xa682, 0xa683}, - {0xa683, 0xa682}, - {0xa684, 0xa685}, - {0xa685, 0xa684}, - {0xa686, 0xa687}, - {0xa687, 0xa686}, - {0xa688, 0xa689}, - {0xa689, 0xa688}, - {0xa68a, 0xa68b}, - {0xa68b, 0xa68a}, - {0xa68c, 0xa68d}, - {0xa68d, 0xa68c}, - {0xa68e, 0xa68f}, - {0xa68f, 0xa68e}, - {0xa690, 0xa691}, - {0xa691, 0xa690}, - {0xa692, 0xa693}, - {0xa693, 0xa692}, - {0xa694, 0xa695}, - {0xa695, 0xa694}, - {0xa696, 0xa697}, - {0xa697, 0xa696}, - {0xa698, 0xa699}, - {0xa699, 0xa698}, - {0xa69a, 0xa69b}, - {0xa69b, 0xa69a}, - {0xa722, 0xa723}, - {0xa723, 0xa722}, - {0xa724, 0xa725}, - {0xa725, 0xa724}, - {0xa726, 0xa727}, - {0xa727, 0xa726}, - {0xa728, 0xa729}, - {0xa729, 0xa728}, - {0xa72a, 0xa72b}, - {0xa72b, 0xa72a}, - {0xa72c, 0xa72d}, - {0xa72d, 0xa72c}, - {0xa72e, 0xa72f}, - {0xa72f, 0xa72e}, - {0xa732, 0xa733}, - {0xa733, 0xa732}, - {0xa734, 0xa735}, - {0xa735, 0xa734}, - {0xa736, 0xa737}, - {0xa737, 0xa736}, - {0xa738, 0xa739}, - {0xa739, 0xa738}, - {0xa73a, 0xa73b}, - {0xa73b, 0xa73a}, - {0xa73c, 0xa73d}, - {0xa73d, 0xa73c}, - {0xa73e, 0xa73f}, - {0xa73f, 0xa73e}, - {0xa740, 0xa741}, - {0xa741, 0xa740}, - {0xa742, 0xa743}, - {0xa743, 0xa742}, - {0xa744, 0xa745}, - {0xa745, 0xa744}, - {0xa746, 0xa747}, - {0xa747, 0xa746}, - {0xa748, 0xa749}, - {0xa749, 0xa748}, - {0xa74a, 0xa74b}, - {0xa74b, 0xa74a}, - {0xa74c, 0xa74d}, - {0xa74d, 0xa74c}, - {0xa74e, 0xa74f}, - {0xa74f, 0xa74e}, - {0xa750, 0xa751}, - {0xa751, 0xa750}, - {0xa752, 0xa753}, - {0xa753, 0xa752}, - {0xa754, 0xa755}, - {0xa755, 0xa754}, - {0xa756, 0xa757}, - {0xa757, 0xa756}, - {0xa758, 0xa759}, - {0xa759, 0xa758}, - {0xa75a, 0xa75b}, - {0xa75b, 0xa75a}, - {0xa75c, 0xa75d}, - {0xa75d, 0xa75c}, - {0xa75e, 0xa75f}, - {0xa75f, 0xa75e}, - {0xa760, 0xa761}, - {0xa761, 0xa760}, - {0xa762, 0xa763}, - {0xa763, 0xa762}, - {0xa764, 0xa765}, - {0xa765, 0xa764}, - {0xa766, 0xa767}, - {0xa767, 0xa766}, - {0xa768, 0xa769}, - {0xa769, 0xa768}, - {0xa76a, 0xa76b}, - {0xa76b, 0xa76a}, - {0xa76c, 0xa76d}, - {0xa76d, 0xa76c}, - {0xa76e, 0xa76f}, - {0xa76f, 0xa76e}, - {0xa779, 0xa77a}, - {0xa77a, 0xa779}, - {0xa77b, 0xa77c}, - {0xa77c, 0xa77b}, - {0xa77d, 0x1d79}, - {0xa77e, 0xa77f}, - {0xa77f, 0xa77e}, - {0xa780, 0xa781}, - {0xa781, 0xa780}, - {0xa782, 0xa783}, - {0xa783, 0xa782}, - {0xa784, 0xa785}, - {0xa785, 0xa784}, - {0xa786, 0xa787}, - {0xa787, 0xa786}, - {0xa78b, 0xa78c}, - {0xa78c, 0xa78b}, - {0xa78d, 0x265}, - {0xa790, 0xa791}, - {0xa791, 0xa790}, - {0xa792, 0xa793}, - {0xa793, 0xa792}, - {0xa796, 0xa797}, - {0xa797, 0xa796}, - {0xa798, 0xa799}, - {0xa799, 0xa798}, - {0xa79a, 0xa79b}, - {0xa79b, 0xa79a}, - {0xa79c, 0xa79d}, - {0xa79d, 0xa79c}, - {0xa79e, 0xa79f}, - {0xa79f, 0xa79e}, - {0xa7a0, 0xa7a1}, - {0xa7a1, 0xa7a0}, - {0xa7a2, 0xa7a3}, - {0xa7a3, 0xa7a2}, - {0xa7a4, 0xa7a5}, - {0xa7a5, 0xa7a4}, - {0xa7a6, 0xa7a7}, - {0xa7a7, 0xa7a6}, - {0xa7a8, 0xa7a9}, - {0xa7a9, 0xa7a8}, - {0xa7aa, 0x266}, - {0xa7ab, 0x25c}, - {0xa7ac, 0x261}, - {0xa7ad, 0x26c}, - {0xa7b0, 0x29e}, - {0xa7b1, 0x287}, - {0xff21, 0xff41}, - {0xff22, 0xff42}, - {0xff23, 0xff43}, - {0xff24, 0xff44}, - {0xff25, 0xff45}, - {0xff26, 0xff46}, - {0xff27, 0xff47}, - {0xff28, 0xff48}, - {0xff29, 0xff49}, - {0xff2a, 0xff4a}, - {0xff2b, 0xff4b}, - {0xff2c, 0xff4c}, - {0xff2d, 0xff4d}, - {0xff2e, 0xff4e}, - {0xff2f, 0xff4f}, - {0xff30, 0xff50}, - {0xff31, 0xff51}, - {0xff32, 0xff52}, - {0xff33, 0xff53}, - {0xff34, 0xff54}, - {0xff35, 0xff55}, - {0xff36, 0xff56}, - {0xff37, 0xff57}, - {0xff38, 0xff58}, - {0xff39, 0xff59}, - {0xff3a, 0xff5a}, - {0xff41, 0xff21}, - {0xff42, 0xff22}, - {0xff43, 0xff23}, - {0xff44, 0xff24}, - {0xff45, 0xff25}, - {0xff46, 0xff26}, - {0xff47, 0xff27}, - {0xff48, 0xff28}, - {0xff49, 0xff29}, - {0xff4a, 0xff2a}, - {0xff4b, 0xff2b}, - {0xff4c, 0xff2c}, - {0xff4d, 0xff2d}, - {0xff4e, 0xff2e}, - {0xff4f, 0xff2f}, - {0xff50, 0xff30}, - {0xff51, 0xff31}, - {0xff52, 0xff32}, - {0xff53, 0xff33}, - {0xff54, 0xff34}, - {0xff55, 0xff35}, - {0xff56, 0xff36}, - {0xff57, 0xff37}, - {0xff58, 0xff38}, - {0xff59, 0xff39}, - {0xff5a, 0xff3a}, - {0x10400, 0x10428}, - {0x10401, 0x10429}, - {0x10402, 0x1042a}, - {0x10403, 0x1042b}, - {0x10404, 0x1042c}, - {0x10405, 0x1042d}, - {0x10406, 0x1042e}, - {0x10407, 0x1042f}, - {0x10408, 0x10430}, - {0x10409, 0x10431}, - {0x1040a, 0x10432}, - {0x1040b, 0x10433}, - {0x1040c, 0x10434}, - {0x1040d, 0x10435}, - {0x1040e, 0x10436}, - {0x1040f, 0x10437}, - {0x10410, 0x10438}, - {0x10411, 0x10439}, - {0x10412, 0x1043a}, - {0x10413, 0x1043b}, - {0x10414, 0x1043c}, - {0x10415, 0x1043d}, - {0x10416, 0x1043e}, - {0x10417, 0x1043f}, - {0x10418, 0x10440}, - {0x10419, 0x10441}, - {0x1041a, 0x10442}, - {0x1041b, 0x10443}, - {0x1041c, 0x10444}, - {0x1041d, 0x10445}, - {0x1041e, 0x10446}, - {0x1041f, 0x10447}, - {0x10420, 0x10448}, - {0x10421, 0x10449}, - {0x10422, 0x1044a}, - {0x10423, 0x1044b}, - {0x10424, 0x1044c}, - {0x10425, 0x1044d}, - {0x10426, 0x1044e}, - {0x10427, 0x1044f}, - {0x10428, 0x10400}, - {0x10429, 0x10401}, - {0x1042a, 0x10402}, - {0x1042b, 0x10403}, - {0x1042c, 0x10404}, - {0x1042d, 0x10405}, - {0x1042e, 0x10406}, - {0x1042f, 0x10407}, - {0x10430, 0x10408}, - {0x10431, 0x10409}, - {0x10432, 0x1040a}, - {0x10433, 0x1040b}, - {0x10434, 0x1040c}, - {0x10435, 0x1040d}, - {0x10436, 0x1040e}, - {0x10437, 0x1040f}, - {0x10438, 0x10410}, - {0x10439, 0x10411}, - {0x1043a, 0x10412}, - {0x1043b, 0x10413}, - {0x1043c, 0x10414}, - {0x1043d, 0x10415}, - {0x1043e, 0x10416}, - {0x1043f, 0x10417}, - {0x10440, 0x10418}, - {0x10441, 0x10419}, - {0x10442, 0x1041a}, - {0x10443, 0x1041b}, - {0x10444, 0x1041c}, - {0x10445, 0x1041d}, - {0x10446, 0x1041e}, - {0x10447, 0x1041f}, - {0x10448, 0x10420}, - {0x10449, 0x10421}, - {0x1044a, 0x10422}, - {0x1044b, 0x10423}, - {0x1044c, 0x10424}, - {0x1044d, 0x10425}, - {0x1044e, 0x10426}, - {0x1044f, 0x10427}, - {0x118a0, 0x118c0}, - {0x118a1, 0x118c1}, - {0x118a2, 0x118c2}, - {0x118a3, 0x118c3}, - {0x118a4, 0x118c4}, - {0x118a5, 0x118c5}, - {0x118a6, 0x118c6}, - {0x118a7, 0x118c7}, - {0x118a8, 0x118c8}, - {0x118a9, 0x118c9}, - {0x118aa, 0x118ca}, - {0x118ab, 0x118cb}, - {0x118ac, 0x118cc}, - {0x118ad, 0x118cd}, - {0x118ae, 0x118ce}, - {0x118af, 0x118cf}, - {0x118b0, 0x118d0}, - {0x118b1, 0x118d1}, - {0x118b2, 0x118d2}, - {0x118b3, 0x118d3}, - {0x118b4, 0x118d4}, - {0x118b5, 0x118d5}, - {0x118b6, 0x118d6}, - {0x118b7, 0x118d7}, - {0x118b8, 0x118d8}, - {0x118b9, 0x118d9}, - {0x118ba, 0x118da}, - {0x118bb, 0x118db}, - {0x118bc, 0x118dc}, - {0x118bd, 0x118dd}, - {0x118be, 0x118de}, - {0x118bf, 0x118df}, - {0x118c0, 0x118a0}, - {0x118c1, 0x118a1}, - {0x118c2, 0x118a2}, - {0x118c3, 0x118a3}, - {0x118c4, 0x118a4}, - {0x118c5, 0x118a5}, - {0x118c6, 0x118a6}, - {0x118c7, 0x118a7}, - {0x118c8, 0x118a8}, - {0x118c9, 0x118a9}, - {0x118ca, 0x118aa}, - {0x118cb, 0x118ab}, - {0x118cc, 0x118ac}, - {0x118cd, 0x118ad}, - {0x118ce, 0x118ae}, - {0x118cf, 0x118af}, - {0x118d0, 0x118b0}, - {0x118d1, 0x118b1}, - {0x118d2, 0x118b2}, - {0x118d3, 0x118b3}, - {0x118d4, 0x118b4}, - {0x118d5, 0x118b5}, - {0x118d6, 0x118b6}, - {0x118d7, 0x118b7}, - {0x118d8, 0x118b8}, - {0x118d9, 0x118b9}, - {0x118da, 0x118ba}, - {0x118db, 0x118bb}, - {0x118dc, 0x118bc}, - {0x118dd, 0x118bd}, - {0x118de, 0x118be}, - {0x118df, 0x118bf}, -}; - -#endif // UCP_TABLE_DEFINE_FN - -} // namespace ue2 - -#endif - +/* + * Copyright (c) 2015, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef UCP_TABLE_H +#define UCP_TABLE_H + +/* Generated by tools/scripts/ucp.py based on unicode database + * + * Do not hand edit + */ + +namespace ue2 { + +class CodePointSet; +void make_caseless(CodePointSet *cps); +bool flip_case(unichar *c); + +CodePointSet getUcpC(void); +CodePointSet getUcpCc(void); +CodePointSet getUcpCf(void); +CodePointSet getUcpCn(void); +CodePointSet getUcpCo(void); +CodePointSet getUcpCs(void); +CodePointSet getUcpL(void); +CodePointSet getUcpL_and(void); +CodePointSet getUcpLl(void); +CodePointSet getUcpLm(void); +CodePointSet getUcpLo(void); +CodePointSet getUcpLt(void); +CodePointSet getUcpLu(void); +CodePointSet getUcpM(void); +CodePointSet getUcpMc(void); +CodePointSet getUcpMe(void); +CodePointSet getUcpMn(void); +CodePointSet getUcpN(void); +CodePointSet getUcpNd(void); +CodePointSet getUcpNl(void); +CodePointSet getUcpNo(void); +CodePointSet getUcpP(void); +CodePointSet getUcpPc(void); +CodePointSet getUcpPd(void); +CodePointSet getUcpPe(void); +CodePointSet getUcpPf(void); +CodePointSet getUcpPi(void); +CodePointSet getUcpPo(void); +CodePointSet getUcpPs(void); +CodePointSet getUcpS(void); +CodePointSet getUcpSc(void); +CodePointSet getUcpSk(void); +CodePointSet getUcpSm(void); +CodePointSet getUcpSo(void); +CodePointSet getUcpXan(void); +CodePointSet getUcpXps(void); +CodePointSet getUcpXsp(void); +CodePointSet getUcpXwd(void); +CodePointSet getUcpZ(void); +CodePointSet getUcpZl(void); +CodePointSet getUcpZp(void); +CodePointSet getUcpZs(void); +CodePointSet getUcpArabic(void); +CodePointSet getUcpArmenian(void); +CodePointSet getUcpAvestan(void); +CodePointSet getUcpBalinese(void); +CodePointSet getUcpBamum(void); +CodePointSet getUcpBassa_Vah(void); +CodePointSet getUcpBatak(void); +CodePointSet getUcpBengali(void); +CodePointSet getUcpBopomofo(void); +CodePointSet getUcpBrahmi(void); +CodePointSet getUcpBraille(void); +CodePointSet getUcpBuginese(void); +CodePointSet getUcpBuhid(void); +CodePointSet getUcpCanadian_Aboriginal(void); +CodePointSet getUcpCarian(void); +CodePointSet getUcpCaucasian_Albanian(void); +CodePointSet getUcpChakma(void); +CodePointSet getUcpCham(void); +CodePointSet getUcpCherokee(void); +CodePointSet getUcpCommon(void); +CodePointSet getUcpCoptic(void); +CodePointSet getUcpCuneiform(void); +CodePointSet getUcpCypriot(void); +CodePointSet getUcpCyrillic(void); +CodePointSet getUcpDeseret(void); +CodePointSet getUcpDevanagari(void); +CodePointSet getUcpDuployan(void); +CodePointSet getUcpEgyptian_Hieroglyphs(void); +CodePointSet getUcpElbasan(void); +CodePointSet getUcpEthiopic(void); +CodePointSet getUcpGeorgian(void); +CodePointSet getUcpGlagolitic(void); +CodePointSet getUcpGothic(void); +CodePointSet getUcpGrantha(void); +CodePointSet getUcpGreek(void); +CodePointSet getUcpGujarati(void); +CodePointSet getUcpGurmukhi(void); +CodePointSet getUcpHan(void); +CodePointSet getUcpHangul(void); +CodePointSet getUcpHanunoo(void); +CodePointSet getUcpHebrew(void); +CodePointSet getUcpHiragana(void); +CodePointSet getUcpImperial_Aramaic(void); +CodePointSet getUcpInherited(void); +CodePointSet getUcpInscriptional_Pahlavi(void); +CodePointSet getUcpInscriptional_Parthian(void); +CodePointSet getUcpJavanese(void); +CodePointSet getUcpKaithi(void); +CodePointSet getUcpKannada(void); +CodePointSet getUcpKatakana(void); +CodePointSet getUcpKayah_Li(void); +CodePointSet getUcpKharoshthi(void); +CodePointSet getUcpKhmer(void); +CodePointSet getUcpKhojki(void); +CodePointSet getUcpKhudawadi(void); +CodePointSet getUcpLao(void); +CodePointSet getUcpLatin(void); +CodePointSet getUcpLepcha(void); +CodePointSet getUcpLimbu(void); +CodePointSet getUcpLinear_A(void); +CodePointSet getUcpLinear_B(void); +CodePointSet getUcpLisu(void); +CodePointSet getUcpLycian(void); +CodePointSet getUcpLydian(void); +CodePointSet getUcpMahajani(void); +CodePointSet getUcpMalayalam(void); +CodePointSet getUcpMandaic(void); +CodePointSet getUcpManichaean(void); +CodePointSet getUcpMeetei_Mayek(void); +CodePointSet getUcpMende_Kikakui(void); +CodePointSet getUcpMeroitic_Cursive(void); +CodePointSet getUcpMeroitic_Hieroglyphs(void); +CodePointSet getUcpMiao(void); +CodePointSet getUcpModi(void); +CodePointSet getUcpMongolian(void); +CodePointSet getUcpMro(void); +CodePointSet getUcpMyanmar(void); +CodePointSet getUcpNabataean(void); +CodePointSet getUcpNew_Tai_Lue(void); +CodePointSet getUcpNko(void); +CodePointSet getUcpOgham(void); +CodePointSet getUcpOl_Chiki(void); +CodePointSet getUcpOld_Italic(void); +CodePointSet getUcpOld_North_Arabian(void); +CodePointSet getUcpOld_Permic(void); +CodePointSet getUcpOld_Persian(void); +CodePointSet getUcpOld_South_Arabian(void); +CodePointSet getUcpOld_Turkic(void); +CodePointSet getUcpOriya(void); +CodePointSet getUcpOsmanya(void); +CodePointSet getUcpPahawh_Hmong(void); +CodePointSet getUcpPalmyrene(void); +CodePointSet getUcpPau_Cin_Hau(void); +CodePointSet getUcpPhags_Pa(void); +CodePointSet getUcpPhoenician(void); +CodePointSet getUcpPsalter_Pahlavi(void); +CodePointSet getUcpRejang(void); +CodePointSet getUcpRunic(void); +CodePointSet getUcpSamaritan(void); +CodePointSet getUcpSaurashtra(void); +CodePointSet getUcpSharada(void); +CodePointSet getUcpShavian(void); +CodePointSet getUcpSiddham(void); +CodePointSet getUcpSinhala(void); +CodePointSet getUcpSora_Sompeng(void); +CodePointSet getUcpSundanese(void); +CodePointSet getUcpSyloti_Nagri(void); +CodePointSet getUcpSyriac(void); +CodePointSet getUcpTagalog(void); +CodePointSet getUcpTagbanwa(void); +CodePointSet getUcpTai_Le(void); +CodePointSet getUcpTai_Tham(void); +CodePointSet getUcpTai_Viet(void); +CodePointSet getUcpTakri(void); +CodePointSet getUcpTamil(void); +CodePointSet getUcpTelugu(void); +CodePointSet getUcpThaana(void); +CodePointSet getUcpThai(void); +CodePointSet getUcpTibetan(void); +CodePointSet getUcpTifinagh(void); +CodePointSet getUcpTirhuta(void); +CodePointSet getUcpUgaritic(void); +CodePointSet getUcpVai(void); +CodePointSet getUcpWarang_Citi(void); +CodePointSet getUcpYi(void); + +#ifdef UCP_TABLE_DEFINE_FN + +static const unichar ucp_C_def[] = { + 0x0, 0x1f, + 0x7f, 0x9f, + 0xad, 0xad, + 0x378, 0x379, + 0x380, 0x383, + 0x38b, 0x38b, + 0x38d, 0x38d, + 0x3a2, 0x3a2, + 0x530, 0x530, + 0x557, 0x558, + 0x560, 0x560, + 0x588, 0x588, + 0x58b, 0x58c, + 0x590, 0x590, + 0x5c8, 0x5cf, + 0x5eb, 0x5ef, + 0x5f5, 0x605, + 0x61c, 0x61d, + 0x6dd, 0x6dd, + 0x70e, 0x70f, + 0x74b, 0x74c, + 0x7b2, 0x7bf, + 0x7fb, 0x7ff, + 0x82e, 0x82f, + 0x83f, 0x83f, + 0x85c, 0x85d, + 0x85f, 0x89f, + 0x8b3, 0x8e3, + 0x984, 0x984, + 0x98d, 0x98e, + 0x991, 0x992, + 0x9a9, 0x9a9, + 0x9b1, 0x9b1, + 0x9b3, 0x9b5, + 0x9ba, 0x9bb, + 0x9c5, 0x9c6, + 0x9c9, 0x9ca, + 0x9cf, 0x9d6, + 0x9d8, 0x9db, + 0x9de, 0x9de, + 0x9e4, 0x9e5, + 0x9fc, 0xa00, + 0xa04, 0xa04, + 0xa0b, 0xa0e, + 0xa11, 0xa12, + 0xa29, 0xa29, + 0xa31, 0xa31, + 0xa34, 0xa34, + 0xa37, 0xa37, + 0xa3a, 0xa3b, + 0xa3d, 0xa3d, + 0xa43, 0xa46, + 0xa49, 0xa4a, + 0xa4e, 0xa50, + 0xa52, 0xa58, + 0xa5d, 0xa5d, + 0xa5f, 0xa65, + 0xa76, 0xa80, + 0xa84, 0xa84, + 0xa8e, 0xa8e, + 0xa92, 0xa92, + 0xaa9, 0xaa9, + 0xab1, 0xab1, + 0xab4, 0xab4, + 0xaba, 0xabb, + 0xac6, 0xac6, + 0xaca, 0xaca, + 0xace, 0xacf, + 0xad1, 0xadf, + 0xae4, 0xae5, + 0xaf2, 0xb00, + 0xb04, 0xb04, + 0xb0d, 0xb0e, + 0xb11, 0xb12, + 0xb29, 0xb29, + 0xb31, 0xb31, + 0xb34, 0xb34, + 0xb3a, 0xb3b, + 0xb45, 0xb46, + 0xb49, 0xb4a, + 0xb4e, 0xb55, + 0xb58, 0xb5b, + 0xb5e, 0xb5e, + 0xb64, 0xb65, + 0xb78, 0xb81, + 0xb84, 0xb84, + 0xb8b, 0xb8d, + 0xb91, 0xb91, + 0xb96, 0xb98, + 0xb9b, 0xb9b, + 0xb9d, 0xb9d, + 0xba0, 0xba2, + 0xba5, 0xba7, + 0xbab, 0xbad, + 0xbba, 0xbbd, + 0xbc3, 0xbc5, + 0xbc9, 0xbc9, + 0xbce, 0xbcf, + 0xbd1, 0xbd6, + 0xbd8, 0xbe5, + 0xbfb, 0xbff, + 0xc04, 0xc04, + 0xc0d, 0xc0d, + 0xc11, 0xc11, + 0xc29, 0xc29, + 0xc3a, 0xc3c, + 0xc45, 0xc45, + 0xc49, 0xc49, + 0xc4e, 0xc54, + 0xc57, 0xc57, + 0xc5a, 0xc5f, + 0xc64, 0xc65, + 0xc70, 0xc77, + 0xc80, 0xc80, + 0xc84, 0xc84, + 0xc8d, 0xc8d, + 0xc91, 0xc91, + 0xca9, 0xca9, + 0xcb4, 0xcb4, + 0xcba, 0xcbb, + 0xcc5, 0xcc5, + 0xcc9, 0xcc9, + 0xcce, 0xcd4, + 0xcd7, 0xcdd, + 0xcdf, 0xcdf, + 0xce4, 0xce5, + 0xcf0, 0xcf0, + 0xcf3, 0xd00, + 0xd04, 0xd04, + 0xd0d, 0xd0d, + 0xd11, 0xd11, + 0xd3b, 0xd3c, + 0xd45, 0xd45, + 0xd49, 0xd49, + 0xd4f, 0xd56, + 0xd58, 0xd5f, + 0xd64, 0xd65, + 0xd76, 0xd78, + 0xd80, 0xd81, + 0xd84, 0xd84, + 0xd97, 0xd99, + 0xdb2, 0xdb2, + 0xdbc, 0xdbc, + 0xdbe, 0xdbf, + 0xdc7, 0xdc9, + 0xdcb, 0xdce, + 0xdd5, 0xdd5, + 0xdd7, 0xdd7, + 0xde0, 0xde5, + 0xdf0, 0xdf1, + 0xdf5, 0xe00, + 0xe3b, 0xe3e, + 0xe5c, 0xe80, + 0xe83, 0xe83, + 0xe85, 0xe86, + 0xe89, 0xe89, + 0xe8b, 0xe8c, + 0xe8e, 0xe93, + 0xe98, 0xe98, + 0xea0, 0xea0, + 0xea4, 0xea4, + 0xea6, 0xea6, + 0xea8, 0xea9, + 0xeac, 0xeac, + 0xeba, 0xeba, + 0xebe, 0xebf, + 0xec5, 0xec5, + 0xec7, 0xec7, + 0xece, 0xecf, + 0xeda, 0xedb, + 0xee0, 0xeff, + 0xf48, 0xf48, + 0xf6d, 0xf70, + 0xf98, 0xf98, + 0xfbd, 0xfbd, + 0xfcd, 0xfcd, + 0xfdb, 0xfff, + 0x10c6, 0x10c6, + 0x10c8, 0x10cc, + 0x10ce, 0x10cf, + 0x1249, 0x1249, + 0x124e, 0x124f, + 0x1257, 0x1257, + 0x1259, 0x1259, + 0x125e, 0x125f, + 0x1289, 0x1289, + 0x128e, 0x128f, + 0x12b1, 0x12b1, + 0x12b6, 0x12b7, + 0x12bf, 0x12bf, + 0x12c1, 0x12c1, + 0x12c6, 0x12c7, + 0x12d7, 0x12d7, + 0x1311, 0x1311, + 0x1316, 0x1317, + 0x135b, 0x135c, + 0x137d, 0x137f, + 0x139a, 0x139f, + 0x13f5, 0x13ff, + 0x169d, 0x169f, + 0x16f9, 0x16ff, + 0x170d, 0x170d, + 0x1715, 0x171f, + 0x1737, 0x173f, + 0x1754, 0x175f, + 0x176d, 0x176d, + 0x1771, 0x1771, + 0x1774, 0x177f, + 0x17de, 0x17df, + 0x17ea, 0x17ef, + 0x17fa, 0x17ff, + 0x180e, 0x180f, + 0x181a, 0x181f, + 0x1878, 0x187f, + 0x18ab, 0x18af, + 0x18f6, 0x18ff, + 0x191f, 0x191f, + 0x192c, 0x192f, + 0x193c, 0x193f, + 0x1941, 0x1943, + 0x196e, 0x196f, + 0x1975, 0x197f, + 0x19ac, 0x19af, + 0x19ca, 0x19cf, + 0x19db, 0x19dd, + 0x1a1c, 0x1a1d, + 0x1a5f, 0x1a5f, + 0x1a7d, 0x1a7e, + 0x1a8a, 0x1a8f, + 0x1a9a, 0x1a9f, + 0x1aae, 0x1aaf, + 0x1abf, 0x1aff, + 0x1b4c, 0x1b4f, + 0x1b7d, 0x1b7f, + 0x1bf4, 0x1bfb, + 0x1c38, 0x1c3a, + 0x1c4a, 0x1c4c, + 0x1c80, 0x1cbf, + 0x1cc8, 0x1ccf, + 0x1cf7, 0x1cf7, + 0x1cfa, 0x1cff, + 0x1df6, 0x1dfb, + 0x1f16, 0x1f17, + 0x1f1e, 0x1f1f, + 0x1f46, 0x1f47, + 0x1f4e, 0x1f4f, + 0x1f58, 0x1f58, + 0x1f5a, 0x1f5a, + 0x1f5c, 0x1f5c, + 0x1f5e, 0x1f5e, + 0x1f7e, 0x1f7f, + 0x1fb5, 0x1fb5, + 0x1fc5, 0x1fc5, + 0x1fd4, 0x1fd5, + 0x1fdc, 0x1fdc, + 0x1ff0, 0x1ff1, + 0x1ff5, 0x1ff5, + 0x1fff, 0x1fff, + 0x200b, 0x200f, + 0x202a, 0x202e, + 0x2060, 0x206f, + 0x2072, 0x2073, + 0x208f, 0x208f, + 0x209d, 0x209f, + 0x20be, 0x20cf, + 0x20f1, 0x20ff, + 0x218a, 0x218f, + 0x23fb, 0x23ff, + 0x2427, 0x243f, + 0x244b, 0x245f, + 0x2b74, 0x2b75, + 0x2b96, 0x2b97, + 0x2bba, 0x2bbc, + 0x2bc9, 0x2bc9, + 0x2bd2, 0x2bff, + 0x2c2f, 0x2c2f, + 0x2c5f, 0x2c5f, + 0x2cf4, 0x2cf8, + 0x2d26, 0x2d26, + 0x2d28, 0x2d2c, + 0x2d2e, 0x2d2f, + 0x2d68, 0x2d6e, + 0x2d71, 0x2d7e, + 0x2d97, 0x2d9f, + 0x2da7, 0x2da7, + 0x2daf, 0x2daf, + 0x2db7, 0x2db7, + 0x2dbf, 0x2dbf, + 0x2dc7, 0x2dc7, + 0x2dcf, 0x2dcf, + 0x2dd7, 0x2dd7, + 0x2ddf, 0x2ddf, + 0x2e43, 0x2e7f, + 0x2e9a, 0x2e9a, + 0x2ef4, 0x2eff, + 0x2fd6, 0x2fef, + 0x2ffc, 0x2fff, + 0x3040, 0x3040, + 0x3097, 0x3098, + 0x3100, 0x3104, + 0x312e, 0x3130, + 0x318f, 0x318f, + 0x31bb, 0x31bf, + 0x31e4, 0x31ef, + 0x321f, 0x321f, + 0x32ff, 0x32ff, + 0x4db6, 0x4dbf, + 0x9fcd, 0x9fff, + 0xa48d, 0xa48f, + 0xa4c7, 0xa4cf, + 0xa62c, 0xa63f, + 0xa69e, 0xa69e, + 0xa6f8, 0xa6ff, + 0xa78f, 0xa78f, + 0xa7ae, 0xa7af, + 0xa7b2, 0xa7f6, + 0xa82c, 0xa82f, + 0xa83a, 0xa83f, + 0xa878, 0xa87f, + 0xa8c5, 0xa8cd, + 0xa8da, 0xa8df, + 0xa8fc, 0xa8ff, + 0xa954, 0xa95e, + 0xa97d, 0xa97f, + 0xa9ce, 0xa9ce, + 0xa9da, 0xa9dd, + 0xa9ff, 0xa9ff, + 0xaa37, 0xaa3f, + 0xaa4e, 0xaa4f, + 0xaa5a, 0xaa5b, + 0xaac3, 0xaada, + 0xaaf7, 0xab00, + 0xab07, 0xab08, + 0xab0f, 0xab10, + 0xab17, 0xab1f, + 0xab27, 0xab27, + 0xab2f, 0xab2f, + 0xab60, 0xab63, + 0xab66, 0xabbf, + 0xabee, 0xabef, + 0xabfa, 0xabff, + 0xd7a4, 0xd7af, + 0xd7c7, 0xd7ca, + 0xd7fc, 0xf8ff, + 0xfa6e, 0xfa6f, + 0xfada, 0xfaff, + 0xfb07, 0xfb12, + 0xfb18, 0xfb1c, + 0xfb37, 0xfb37, + 0xfb3d, 0xfb3d, + 0xfb3f, 0xfb3f, + 0xfb42, 0xfb42, + 0xfb45, 0xfb45, + 0xfbc2, 0xfbd2, + 0xfd40, 0xfd4f, + 0xfd90, 0xfd91, + 0xfdc8, 0xfdef, + 0xfdfe, 0xfdff, + 0xfe1a, 0xfe1f, + 0xfe2e, 0xfe2f, + 0xfe53, 0xfe53, + 0xfe67, 0xfe67, + 0xfe6c, 0xfe6f, + 0xfe75, 0xfe75, + 0xfefd, 0xff00, + 0xffbf, 0xffc1, + 0xffc8, 0xffc9, + 0xffd0, 0xffd1, + 0xffd8, 0xffd9, + 0xffdd, 0xffdf, + 0xffe7, 0xffe7, + 0xffef, 0xfffb, + 0xfffe, 0xffff, + 0x1000c, 0x1000c, + 0x10027, 0x10027, + 0x1003b, 0x1003b, + 0x1003e, 0x1003e, + 0x1004e, 0x1004f, + 0x1005e, 0x1007f, + 0x100fb, 0x100ff, + 0x10103, 0x10106, + 0x10134, 0x10136, + 0x1018d, 0x1018f, + 0x1019c, 0x1019f, + 0x101a1, 0x101cf, + 0x101fe, 0x1027f, + 0x1029d, 0x1029f, + 0x102d1, 0x102df, + 0x102fc, 0x102ff, + 0x10324, 0x1032f, + 0x1034b, 0x1034f, + 0x1037b, 0x1037f, + 0x1039e, 0x1039e, + 0x103c4, 0x103c7, + 0x103d6, 0x103ff, + 0x1049e, 0x1049f, + 0x104aa, 0x104ff, + 0x10528, 0x1052f, + 0x10564, 0x1056e, + 0x10570, 0x105ff, + 0x10737, 0x1073f, + 0x10756, 0x1075f, + 0x10768, 0x107ff, + 0x10806, 0x10807, + 0x10809, 0x10809, + 0x10836, 0x10836, + 0x10839, 0x1083b, + 0x1083d, 0x1083e, + 0x10856, 0x10856, + 0x1089f, 0x108a6, + 0x108b0, 0x108ff, + 0x1091c, 0x1091e, + 0x1093a, 0x1093e, + 0x10940, 0x1097f, + 0x109b8, 0x109bd, + 0x109c0, 0x109ff, + 0x10a04, 0x10a04, + 0x10a07, 0x10a0b, + 0x10a14, 0x10a14, + 0x10a18, 0x10a18, + 0x10a34, 0x10a37, + 0x10a3b, 0x10a3e, + 0x10a48, 0x10a4f, + 0x10a59, 0x10a5f, + 0x10aa0, 0x10abf, + 0x10ae7, 0x10aea, + 0x10af7, 0x10aff, + 0x10b36, 0x10b38, + 0x10b56, 0x10b57, + 0x10b73, 0x10b77, + 0x10b92, 0x10b98, + 0x10b9d, 0x10ba8, + 0x10bb0, 0x10bff, + 0x10c49, 0x10e5f, + 0x10e7f, 0x10fff, + 0x1104e, 0x11051, + 0x11070, 0x1107e, + 0x110bd, 0x110bd, + 0x110c2, 0x110cf, + 0x110e9, 0x110ef, + 0x110fa, 0x110ff, + 0x11135, 0x11135, + 0x11144, 0x1114f, + 0x11177, 0x1117f, + 0x111c9, 0x111cc, + 0x111ce, 0x111cf, + 0x111db, 0x111e0, + 0x111f5, 0x111ff, + 0x11212, 0x11212, + 0x1123e, 0x112af, + 0x112eb, 0x112ef, + 0x112fa, 0x11300, + 0x11304, 0x11304, + 0x1130d, 0x1130e, + 0x11311, 0x11312, + 0x11329, 0x11329, + 0x11331, 0x11331, + 0x11334, 0x11334, + 0x1133a, 0x1133b, + 0x11345, 0x11346, + 0x11349, 0x1134a, + 0x1134e, 0x11356, + 0x11358, 0x1135c, + 0x11364, 0x11365, + 0x1136d, 0x1136f, + 0x11375, 0x1147f, + 0x114c8, 0x114cf, + 0x114da, 0x1157f, + 0x115b6, 0x115b7, + 0x115ca, 0x115ff, + 0x11645, 0x1164f, + 0x1165a, 0x1167f, + 0x116b8, 0x116bf, + 0x116ca, 0x1189f, + 0x118f3, 0x118fe, + 0x11900, 0x11abf, + 0x11af9, 0x11fff, + 0x12399, 0x123ff, + 0x1246f, 0x1246f, + 0x12475, 0x12fff, + 0x1342f, 0x167ff, + 0x16a39, 0x16a3f, + 0x16a5f, 0x16a5f, + 0x16a6a, 0x16a6d, + 0x16a70, 0x16acf, + 0x16aee, 0x16aef, + 0x16af6, 0x16aff, + 0x16b46, 0x16b4f, + 0x16b5a, 0x16b5a, + 0x16b62, 0x16b62, + 0x16b78, 0x16b7c, + 0x16b90, 0x16eff, + 0x16f45, 0x16f4f, + 0x16f7f, 0x16f8e, + 0x16fa0, 0x1afff, + 0x1b002, 0x1bbff, + 0x1bc6b, 0x1bc6f, + 0x1bc7d, 0x1bc7f, + 0x1bc89, 0x1bc8f, + 0x1bc9a, 0x1bc9b, + 0x1bca0, 0x1cfff, + 0x1d0f6, 0x1d0ff, + 0x1d127, 0x1d128, + 0x1d173, 0x1d17a, + 0x1d1de, 0x1d1ff, + 0x1d246, 0x1d2ff, + 0x1d357, 0x1d35f, + 0x1d372, 0x1d3ff, + 0x1d455, 0x1d455, + 0x1d49d, 0x1d49d, + 0x1d4a0, 0x1d4a1, + 0x1d4a3, 0x1d4a4, + 0x1d4a7, 0x1d4a8, + 0x1d4ad, 0x1d4ad, + 0x1d4ba, 0x1d4ba, + 0x1d4bc, 0x1d4bc, + 0x1d4c4, 0x1d4c4, + 0x1d506, 0x1d506, + 0x1d50b, 0x1d50c, + 0x1d515, 0x1d515, + 0x1d51d, 0x1d51d, + 0x1d53a, 0x1d53a, + 0x1d53f, 0x1d53f, + 0x1d545, 0x1d545, + 0x1d547, 0x1d549, + 0x1d551, 0x1d551, + 0x1d6a6, 0x1d6a7, + 0x1d7cc, 0x1d7cd, + 0x1d800, 0x1e7ff, + 0x1e8c5, 0x1e8c6, + 0x1e8d7, 0x1edff, + 0x1ee04, 0x1ee04, + 0x1ee20, 0x1ee20, + 0x1ee23, 0x1ee23, + 0x1ee25, 0x1ee26, + 0x1ee28, 0x1ee28, + 0x1ee33, 0x1ee33, + 0x1ee38, 0x1ee38, + 0x1ee3a, 0x1ee3a, + 0x1ee3c, 0x1ee41, + 0x1ee43, 0x1ee46, + 0x1ee48, 0x1ee48, + 0x1ee4a, 0x1ee4a, + 0x1ee4c, 0x1ee4c, + 0x1ee50, 0x1ee50, + 0x1ee53, 0x1ee53, + 0x1ee55, 0x1ee56, + 0x1ee58, 0x1ee58, + 0x1ee5a, 0x1ee5a, + 0x1ee5c, 0x1ee5c, + 0x1ee5e, 0x1ee5e, + 0x1ee60, 0x1ee60, + 0x1ee63, 0x1ee63, + 0x1ee65, 0x1ee66, + 0x1ee6b, 0x1ee6b, + 0x1ee73, 0x1ee73, + 0x1ee78, 0x1ee78, + 0x1ee7d, 0x1ee7d, + 0x1ee7f, 0x1ee7f, + 0x1ee8a, 0x1ee8a, + 0x1ee9c, 0x1eea0, + 0x1eea4, 0x1eea4, + 0x1eeaa, 0x1eeaa, + 0x1eebc, 0x1eeef, + 0x1eef2, 0x1efff, + 0x1f02c, 0x1f02f, + 0x1f094, 0x1f09f, + 0x1f0af, 0x1f0b0, + 0x1f0c0, 0x1f0c0, + 0x1f0d0, 0x1f0d0, + 0x1f0f6, 0x1f0ff, + 0x1f10d, 0x1f10f, + 0x1f12f, 0x1f12f, + 0x1f16c, 0x1f16f, + 0x1f19b, 0x1f1e5, + 0x1f203, 0x1f20f, + 0x1f23b, 0x1f23f, + 0x1f249, 0x1f24f, + 0x1f252, 0x1f2ff, + 0x1f32d, 0x1f32f, + 0x1f37e, 0x1f37f, + 0x1f3cf, 0x1f3d3, + 0x1f3f8, 0x1f3ff, + 0x1f4ff, 0x1f4ff, + 0x1f54b, 0x1f54f, + 0x1f57a, 0x1f57a, + 0x1f5a4, 0x1f5a4, + 0x1f643, 0x1f644, + 0x1f6d0, 0x1f6df, + 0x1f6ed, 0x1f6ef, + 0x1f6f4, 0x1f6ff, + 0x1f774, 0x1f77f, + 0x1f7d5, 0x1f7ff, + 0x1f80c, 0x1f80f, + 0x1f848, 0x1f84f, + 0x1f85a, 0x1f85f, + 0x1f888, 0x1f88f, + 0x1f8ae, 0x1ffff, + 0x2a6d7, 0x2a6ff, + 0x2b735, 0x2b73f, + 0x2b81e, 0x2f7ff, + 0x2fa1e, 0xe00ff, + 0xe01f0, 0x10ffff +}; +UCP_FN(C) + +static const unichar ucp_Cc_def[] = { + 0x0, 0x1f, + 0x7f, 0x9f +}; +UCP_FN(Cc) + +static const unichar ucp_Cf_def[] = { + 0xad, 0xad, + 0x600, 0x605, + 0x61c, 0x61c, + 0x6dd, 0x6dd, + 0x70f, 0x70f, + 0x180e, 0x180e, + 0x200b, 0x200f, + 0x202a, 0x202e, + 0x2060, 0x2064, + 0x2066, 0x206f, + 0xfeff, 0xfeff, + 0xfff9, 0xfffb, + 0x110bd, 0x110bd, + 0x1bca0, 0x1bca3, + 0x1d173, 0x1d17a, + 0xe0001, 0xe0001, + 0xe0020, 0xe007f +}; +UCP_FN(Cf) + +static const unichar ucp_Cn_def[] = { + 0x378, 0x379, + 0x380, 0x383, + 0x38b, 0x38b, + 0x38d, 0x38d, + 0x3a2, 0x3a2, + 0x530, 0x530, + 0x557, 0x558, + 0x560, 0x560, + 0x588, 0x588, + 0x58b, 0x58c, + 0x590, 0x590, + 0x5c8, 0x5cf, + 0x5eb, 0x5ef, + 0x5f5, 0x5ff, + 0x61d, 0x61d, + 0x70e, 0x70e, + 0x74b, 0x74c, + 0x7b2, 0x7bf, + 0x7fb, 0x7ff, + 0x82e, 0x82f, + 0x83f, 0x83f, + 0x85c, 0x85d, + 0x85f, 0x89f, + 0x8b3, 0x8e3, + 0x984, 0x984, + 0x98d, 0x98e, + 0x991, 0x992, + 0x9a9, 0x9a9, + 0x9b1, 0x9b1, + 0x9b3, 0x9b5, + 0x9ba, 0x9bb, + 0x9c5, 0x9c6, + 0x9c9, 0x9ca, + 0x9cf, 0x9d6, + 0x9d8, 0x9db, + 0x9de, 0x9de, + 0x9e4, 0x9e5, + 0x9fc, 0xa00, + 0xa04, 0xa04, + 0xa0b, 0xa0e, + 0xa11, 0xa12, + 0xa29, 0xa29, + 0xa31, 0xa31, + 0xa34, 0xa34, + 0xa37, 0xa37, + 0xa3a, 0xa3b, + 0xa3d, 0xa3d, + 0xa43, 0xa46, + 0xa49, 0xa4a, + 0xa4e, 0xa50, + 0xa52, 0xa58, + 0xa5d, 0xa5d, + 0xa5f, 0xa65, + 0xa76, 0xa80, + 0xa84, 0xa84, + 0xa8e, 0xa8e, + 0xa92, 0xa92, + 0xaa9, 0xaa9, + 0xab1, 0xab1, + 0xab4, 0xab4, + 0xaba, 0xabb, + 0xac6, 0xac6, + 0xaca, 0xaca, + 0xace, 0xacf, + 0xad1, 0xadf, + 0xae4, 0xae5, + 0xaf2, 0xb00, + 0xb04, 0xb04, + 0xb0d, 0xb0e, + 0xb11, 0xb12, + 0xb29, 0xb29, + 0xb31, 0xb31, + 0xb34, 0xb34, + 0xb3a, 0xb3b, + 0xb45, 0xb46, + 0xb49, 0xb4a, + 0xb4e, 0xb55, + 0xb58, 0xb5b, + 0xb5e, 0xb5e, + 0xb64, 0xb65, + 0xb78, 0xb81, + 0xb84, 0xb84, + 0xb8b, 0xb8d, + 0xb91, 0xb91, + 0xb96, 0xb98, + 0xb9b, 0xb9b, + 0xb9d, 0xb9d, + 0xba0, 0xba2, + 0xba5, 0xba7, + 0xbab, 0xbad, + 0xbba, 0xbbd, + 0xbc3, 0xbc5, + 0xbc9, 0xbc9, + 0xbce, 0xbcf, + 0xbd1, 0xbd6, + 0xbd8, 0xbe5, + 0xbfb, 0xbff, + 0xc04, 0xc04, + 0xc0d, 0xc0d, + 0xc11, 0xc11, + 0xc29, 0xc29, + 0xc3a, 0xc3c, + 0xc45, 0xc45, + 0xc49, 0xc49, + 0xc4e, 0xc54, + 0xc57, 0xc57, + 0xc5a, 0xc5f, + 0xc64, 0xc65, + 0xc70, 0xc77, + 0xc80, 0xc80, + 0xc84, 0xc84, + 0xc8d, 0xc8d, + 0xc91, 0xc91, + 0xca9, 0xca9, + 0xcb4, 0xcb4, + 0xcba, 0xcbb, + 0xcc5, 0xcc5, + 0xcc9, 0xcc9, + 0xcce, 0xcd4, + 0xcd7, 0xcdd, + 0xcdf, 0xcdf, + 0xce4, 0xce5, + 0xcf0, 0xcf0, + 0xcf3, 0xd00, + 0xd04, 0xd04, + 0xd0d, 0xd0d, + 0xd11, 0xd11, + 0xd3b, 0xd3c, + 0xd45, 0xd45, + 0xd49, 0xd49, + 0xd4f, 0xd56, + 0xd58, 0xd5f, + 0xd64, 0xd65, + 0xd76, 0xd78, + 0xd80, 0xd81, + 0xd84, 0xd84, + 0xd97, 0xd99, + 0xdb2, 0xdb2, + 0xdbc, 0xdbc, + 0xdbe, 0xdbf, + 0xdc7, 0xdc9, + 0xdcb, 0xdce, + 0xdd5, 0xdd5, + 0xdd7, 0xdd7, + 0xde0, 0xde5, + 0xdf0, 0xdf1, + 0xdf5, 0xe00, + 0xe3b, 0xe3e, + 0xe5c, 0xe80, + 0xe83, 0xe83, + 0xe85, 0xe86, + 0xe89, 0xe89, + 0xe8b, 0xe8c, + 0xe8e, 0xe93, + 0xe98, 0xe98, + 0xea0, 0xea0, + 0xea4, 0xea4, + 0xea6, 0xea6, + 0xea8, 0xea9, + 0xeac, 0xeac, + 0xeba, 0xeba, + 0xebe, 0xebf, + 0xec5, 0xec5, + 0xec7, 0xec7, + 0xece, 0xecf, + 0xeda, 0xedb, + 0xee0, 0xeff, + 0xf48, 0xf48, + 0xf6d, 0xf70, + 0xf98, 0xf98, + 0xfbd, 0xfbd, + 0xfcd, 0xfcd, + 0xfdb, 0xfff, + 0x10c6, 0x10c6, + 0x10c8, 0x10cc, + 0x10ce, 0x10cf, + 0x1249, 0x1249, + 0x124e, 0x124f, + 0x1257, 0x1257, + 0x1259, 0x1259, + 0x125e, 0x125f, + 0x1289, 0x1289, + 0x128e, 0x128f, + 0x12b1, 0x12b1, + 0x12b6, 0x12b7, + 0x12bf, 0x12bf, + 0x12c1, 0x12c1, + 0x12c6, 0x12c7, + 0x12d7, 0x12d7, + 0x1311, 0x1311, + 0x1316, 0x1317, + 0x135b, 0x135c, + 0x137d, 0x137f, + 0x139a, 0x139f, + 0x13f5, 0x13ff, + 0x169d, 0x169f, + 0x16f9, 0x16ff, + 0x170d, 0x170d, + 0x1715, 0x171f, + 0x1737, 0x173f, + 0x1754, 0x175f, + 0x176d, 0x176d, + 0x1771, 0x1771, + 0x1774, 0x177f, + 0x17de, 0x17df, + 0x17ea, 0x17ef, + 0x17fa, 0x17ff, + 0x180f, 0x180f, + 0x181a, 0x181f, + 0x1878, 0x187f, + 0x18ab, 0x18af, + 0x18f6, 0x18ff, + 0x191f, 0x191f, + 0x192c, 0x192f, + 0x193c, 0x193f, + 0x1941, 0x1943, + 0x196e, 0x196f, + 0x1975, 0x197f, + 0x19ac, 0x19af, + 0x19ca, 0x19cf, + 0x19db, 0x19dd, + 0x1a1c, 0x1a1d, + 0x1a5f, 0x1a5f, + 0x1a7d, 0x1a7e, + 0x1a8a, 0x1a8f, + 0x1a9a, 0x1a9f, + 0x1aae, 0x1aaf, + 0x1abf, 0x1aff, + 0x1b4c, 0x1b4f, + 0x1b7d, 0x1b7f, + 0x1bf4, 0x1bfb, + 0x1c38, 0x1c3a, + 0x1c4a, 0x1c4c, + 0x1c80, 0x1cbf, + 0x1cc8, 0x1ccf, + 0x1cf7, 0x1cf7, + 0x1cfa, 0x1cff, + 0x1df6, 0x1dfb, + 0x1f16, 0x1f17, + 0x1f1e, 0x1f1f, + 0x1f46, 0x1f47, + 0x1f4e, 0x1f4f, + 0x1f58, 0x1f58, + 0x1f5a, 0x1f5a, + 0x1f5c, 0x1f5c, + 0x1f5e, 0x1f5e, + 0x1f7e, 0x1f7f, + 0x1fb5, 0x1fb5, + 0x1fc5, 0x1fc5, + 0x1fd4, 0x1fd5, + 0x1fdc, 0x1fdc, + 0x1ff0, 0x1ff1, + 0x1ff5, 0x1ff5, + 0x1fff, 0x1fff, + 0x2065, 0x2065, + 0x2072, 0x2073, + 0x208f, 0x208f, + 0x209d, 0x209f, + 0x20be, 0x20cf, + 0x20f1, 0x20ff, + 0x218a, 0x218f, + 0x23fb, 0x23ff, + 0x2427, 0x243f, + 0x244b, 0x245f, + 0x2b74, 0x2b75, + 0x2b96, 0x2b97, + 0x2bba, 0x2bbc, + 0x2bc9, 0x2bc9, + 0x2bd2, 0x2bff, + 0x2c2f, 0x2c2f, + 0x2c5f, 0x2c5f, + 0x2cf4, 0x2cf8, + 0x2d26, 0x2d26, + 0x2d28, 0x2d2c, + 0x2d2e, 0x2d2f, + 0x2d68, 0x2d6e, + 0x2d71, 0x2d7e, + 0x2d97, 0x2d9f, + 0x2da7, 0x2da7, + 0x2daf, 0x2daf, + 0x2db7, 0x2db7, + 0x2dbf, 0x2dbf, + 0x2dc7, 0x2dc7, + 0x2dcf, 0x2dcf, + 0x2dd7, 0x2dd7, + 0x2ddf, 0x2ddf, + 0x2e43, 0x2e7f, + 0x2e9a, 0x2e9a, + 0x2ef4, 0x2eff, + 0x2fd6, 0x2fef, + 0x2ffc, 0x2fff, + 0x3040, 0x3040, + 0x3097, 0x3098, + 0x3100, 0x3104, + 0x312e, 0x3130, + 0x318f, 0x318f, + 0x31bb, 0x31bf, + 0x31e4, 0x31ef, + 0x321f, 0x321f, + 0x32ff, 0x32ff, + 0x4db6, 0x4dbf, + 0x9fcd, 0x9fff, + 0xa48d, 0xa48f, + 0xa4c7, 0xa4cf, + 0xa62c, 0xa63f, + 0xa69e, 0xa69e, + 0xa6f8, 0xa6ff, + 0xa78f, 0xa78f, + 0xa7ae, 0xa7af, + 0xa7b2, 0xa7f6, + 0xa82c, 0xa82f, + 0xa83a, 0xa83f, + 0xa878, 0xa87f, + 0xa8c5, 0xa8cd, + 0xa8da, 0xa8df, + 0xa8fc, 0xa8ff, + 0xa954, 0xa95e, + 0xa97d, 0xa97f, + 0xa9ce, 0xa9ce, + 0xa9da, 0xa9dd, + 0xa9ff, 0xa9ff, + 0xaa37, 0xaa3f, + 0xaa4e, 0xaa4f, + 0xaa5a, 0xaa5b, + 0xaac3, 0xaada, + 0xaaf7, 0xab00, + 0xab07, 0xab08, + 0xab0f, 0xab10, + 0xab17, 0xab1f, + 0xab27, 0xab27, + 0xab2f, 0xab2f, + 0xab60, 0xab63, + 0xab66, 0xabbf, + 0xabee, 0xabef, + 0xabfa, 0xabff, + 0xd7a4, 0xd7af, + 0xd7c7, 0xd7ca, + 0xd7fc, 0xd7ff, + 0xfa6e, 0xfa6f, + 0xfada, 0xfaff, + 0xfb07, 0xfb12, + 0xfb18, 0xfb1c, + 0xfb37, 0xfb37, + 0xfb3d, 0xfb3d, + 0xfb3f, 0xfb3f, + 0xfb42, 0xfb42, + 0xfb45, 0xfb45, + 0xfbc2, 0xfbd2, + 0xfd40, 0xfd4f, + 0xfd90, 0xfd91, + 0xfdc8, 0xfdef, + 0xfdfe, 0xfdff, + 0xfe1a, 0xfe1f, + 0xfe2e, 0xfe2f, + 0xfe53, 0xfe53, + 0xfe67, 0xfe67, + 0xfe6c, 0xfe6f, + 0xfe75, 0xfe75, + 0xfefd, 0xfefe, + 0xff00, 0xff00, + 0xffbf, 0xffc1, + 0xffc8, 0xffc9, + 0xffd0, 0xffd1, + 0xffd8, 0xffd9, + 0xffdd, 0xffdf, + 0xffe7, 0xffe7, + 0xffef, 0xfff8, + 0xfffe, 0xffff, + 0x1000c, 0x1000c, + 0x10027, 0x10027, + 0x1003b, 0x1003b, + 0x1003e, 0x1003e, + 0x1004e, 0x1004f, + 0x1005e, 0x1007f, + 0x100fb, 0x100ff, + 0x10103, 0x10106, + 0x10134, 0x10136, + 0x1018d, 0x1018f, + 0x1019c, 0x1019f, + 0x101a1, 0x101cf, + 0x101fe, 0x1027f, + 0x1029d, 0x1029f, + 0x102d1, 0x102df, + 0x102fc, 0x102ff, + 0x10324, 0x1032f, + 0x1034b, 0x1034f, + 0x1037b, 0x1037f, + 0x1039e, 0x1039e, + 0x103c4, 0x103c7, + 0x103d6, 0x103ff, + 0x1049e, 0x1049f, + 0x104aa, 0x104ff, + 0x10528, 0x1052f, + 0x10564, 0x1056e, + 0x10570, 0x105ff, + 0x10737, 0x1073f, + 0x10756, 0x1075f, + 0x10768, 0x107ff, + 0x10806, 0x10807, + 0x10809, 0x10809, + 0x10836, 0x10836, + 0x10839, 0x1083b, + 0x1083d, 0x1083e, + 0x10856, 0x10856, + 0x1089f, 0x108a6, + 0x108b0, 0x108ff, + 0x1091c, 0x1091e, + 0x1093a, 0x1093e, + 0x10940, 0x1097f, + 0x109b8, 0x109bd, + 0x109c0, 0x109ff, + 0x10a04, 0x10a04, + 0x10a07, 0x10a0b, + 0x10a14, 0x10a14, + 0x10a18, 0x10a18, + 0x10a34, 0x10a37, + 0x10a3b, 0x10a3e, + 0x10a48, 0x10a4f, + 0x10a59, 0x10a5f, + 0x10aa0, 0x10abf, + 0x10ae7, 0x10aea, + 0x10af7, 0x10aff, + 0x10b36, 0x10b38, + 0x10b56, 0x10b57, + 0x10b73, 0x10b77, + 0x10b92, 0x10b98, + 0x10b9d, 0x10ba8, + 0x10bb0, 0x10bff, + 0x10c49, 0x10e5f, + 0x10e7f, 0x10fff, + 0x1104e, 0x11051, + 0x11070, 0x1107e, + 0x110c2, 0x110cf, + 0x110e9, 0x110ef, + 0x110fa, 0x110ff, + 0x11135, 0x11135, + 0x11144, 0x1114f, + 0x11177, 0x1117f, + 0x111c9, 0x111cc, + 0x111ce, 0x111cf, + 0x111db, 0x111e0, + 0x111f5, 0x111ff, + 0x11212, 0x11212, + 0x1123e, 0x112af, + 0x112eb, 0x112ef, + 0x112fa, 0x11300, + 0x11304, 0x11304, + 0x1130d, 0x1130e, + 0x11311, 0x11312, + 0x11329, 0x11329, + 0x11331, 0x11331, + 0x11334, 0x11334, + 0x1133a, 0x1133b, + 0x11345, 0x11346, + 0x11349, 0x1134a, + 0x1134e, 0x11356, + 0x11358, 0x1135c, + 0x11364, 0x11365, + 0x1136d, 0x1136f, + 0x11375, 0x1147f, + 0x114c8, 0x114cf, + 0x114da, 0x1157f, + 0x115b6, 0x115b7, + 0x115ca, 0x115ff, + 0x11645, 0x1164f, + 0x1165a, 0x1167f, + 0x116b8, 0x116bf, + 0x116ca, 0x1189f, + 0x118f3, 0x118fe, + 0x11900, 0x11abf, + 0x11af9, 0x11fff, + 0x12399, 0x123ff, + 0x1246f, 0x1246f, + 0x12475, 0x12fff, + 0x1342f, 0x167ff, + 0x16a39, 0x16a3f, + 0x16a5f, 0x16a5f, + 0x16a6a, 0x16a6d, + 0x16a70, 0x16acf, + 0x16aee, 0x16aef, + 0x16af6, 0x16aff, + 0x16b46, 0x16b4f, + 0x16b5a, 0x16b5a, + 0x16b62, 0x16b62, + 0x16b78, 0x16b7c, + 0x16b90, 0x16eff, + 0x16f45, 0x16f4f, + 0x16f7f, 0x16f8e, + 0x16fa0, 0x1afff, + 0x1b002, 0x1bbff, + 0x1bc6b, 0x1bc6f, + 0x1bc7d, 0x1bc7f, + 0x1bc89, 0x1bc8f, + 0x1bc9a, 0x1bc9b, + 0x1bca4, 0x1cfff, + 0x1d0f6, 0x1d0ff, + 0x1d127, 0x1d128, + 0x1d1de, 0x1d1ff, + 0x1d246, 0x1d2ff, + 0x1d357, 0x1d35f, + 0x1d372, 0x1d3ff, + 0x1d455, 0x1d455, + 0x1d49d, 0x1d49d, + 0x1d4a0, 0x1d4a1, + 0x1d4a3, 0x1d4a4, + 0x1d4a7, 0x1d4a8, + 0x1d4ad, 0x1d4ad, + 0x1d4ba, 0x1d4ba, + 0x1d4bc, 0x1d4bc, + 0x1d4c4, 0x1d4c4, + 0x1d506, 0x1d506, + 0x1d50b, 0x1d50c, + 0x1d515, 0x1d515, + 0x1d51d, 0x1d51d, + 0x1d53a, 0x1d53a, + 0x1d53f, 0x1d53f, + 0x1d545, 0x1d545, + 0x1d547, 0x1d549, + 0x1d551, 0x1d551, + 0x1d6a6, 0x1d6a7, + 0x1d7cc, 0x1d7cd, + 0x1d800, 0x1e7ff, + 0x1e8c5, 0x1e8c6, + 0x1e8d7, 0x1edff, + 0x1ee04, 0x1ee04, + 0x1ee20, 0x1ee20, + 0x1ee23, 0x1ee23, + 0x1ee25, 0x1ee26, + 0x1ee28, 0x1ee28, + 0x1ee33, 0x1ee33, + 0x1ee38, 0x1ee38, + 0x1ee3a, 0x1ee3a, + 0x1ee3c, 0x1ee41, + 0x1ee43, 0x1ee46, + 0x1ee48, 0x1ee48, + 0x1ee4a, 0x1ee4a, + 0x1ee4c, 0x1ee4c, + 0x1ee50, 0x1ee50, + 0x1ee53, 0x1ee53, + 0x1ee55, 0x1ee56, + 0x1ee58, 0x1ee58, + 0x1ee5a, 0x1ee5a, + 0x1ee5c, 0x1ee5c, + 0x1ee5e, 0x1ee5e, + 0x1ee60, 0x1ee60, + 0x1ee63, 0x1ee63, + 0x1ee65, 0x1ee66, + 0x1ee6b, 0x1ee6b, + 0x1ee73, 0x1ee73, + 0x1ee78, 0x1ee78, + 0x1ee7d, 0x1ee7d, + 0x1ee7f, 0x1ee7f, + 0x1ee8a, 0x1ee8a, + 0x1ee9c, 0x1eea0, + 0x1eea4, 0x1eea4, + 0x1eeaa, 0x1eeaa, + 0x1eebc, 0x1eeef, + 0x1eef2, 0x1efff, + 0x1f02c, 0x1f02f, + 0x1f094, 0x1f09f, + 0x1f0af, 0x1f0b0, + 0x1f0c0, 0x1f0c0, + 0x1f0d0, 0x1f0d0, + 0x1f0f6, 0x1f0ff, + 0x1f10d, 0x1f10f, + 0x1f12f, 0x1f12f, + 0x1f16c, 0x1f16f, + 0x1f19b, 0x1f1e5, + 0x1f203, 0x1f20f, + 0x1f23b, 0x1f23f, + 0x1f249, 0x1f24f, + 0x1f252, 0x1f2ff, + 0x1f32d, 0x1f32f, + 0x1f37e, 0x1f37f, + 0x1f3cf, 0x1f3d3, + 0x1f3f8, 0x1f3ff, + 0x1f4ff, 0x1f4ff, + 0x1f54b, 0x1f54f, + 0x1f57a, 0x1f57a, + 0x1f5a4, 0x1f5a4, + 0x1f643, 0x1f644, + 0x1f6d0, 0x1f6df, + 0x1f6ed, 0x1f6ef, + 0x1f6f4, 0x1f6ff, + 0x1f774, 0x1f77f, + 0x1f7d5, 0x1f7ff, + 0x1f80c, 0x1f80f, + 0x1f848, 0x1f84f, + 0x1f85a, 0x1f85f, + 0x1f888, 0x1f88f, + 0x1f8ae, 0x1ffff, + 0x2a6d7, 0x2a6ff, + 0x2b735, 0x2b73f, + 0x2b81e, 0x2f7ff, + 0x2fa1e, 0xe0000, + 0xe0002, 0xe001f, + 0xe0080, 0xe00ff, + 0xe01f0, 0xeffff, + 0xffffe, 0xfffff, + 0x10fffe, 0x10ffff +}; +UCP_FN(Cn) + +static const unichar ucp_Co_def[] = { + 0xe000, 0xf8ff, + 0xf0000, 0xffffd, + 0x100000, 0x10fffd +}; +UCP_FN(Co) + +static const unichar ucp_Cs_def[] = { + 0xd800, 0xdfff +}; +UCP_FN(Cs) + +static const unichar ucp_L_def[] = { + 0x41, 0x5a, + 0x61, 0x7a, + 0xaa, 0xaa, + 0xb5, 0xb5, + 0xba, 0xba, + 0xc0, 0xd6, + 0xd8, 0xf6, + 0xf8, 0x2c1, + 0x2c6, 0x2d1, + 0x2e0, 0x2e4, + 0x2ec, 0x2ec, + 0x2ee, 0x2ee, + 0x370, 0x374, + 0x376, 0x377, + 0x37a, 0x37d, + 0x37f, 0x37f, + 0x386, 0x386, + 0x388, 0x38a, + 0x38c, 0x38c, + 0x38e, 0x3a1, + 0x3a3, 0x3f5, + 0x3f7, 0x481, + 0x48a, 0x52f, + 0x531, 0x556, + 0x559, 0x559, + 0x561, 0x587, + 0x5d0, 0x5ea, + 0x5f0, 0x5f2, + 0x620, 0x64a, + 0x66e, 0x66f, + 0x671, 0x6d3, + 0x6d5, 0x6d5, + 0x6e5, 0x6e6, + 0x6ee, 0x6ef, + 0x6fa, 0x6fc, + 0x6ff, 0x6ff, + 0x710, 0x710, + 0x712, 0x72f, + 0x74d, 0x7a5, + 0x7b1, 0x7b1, + 0x7ca, 0x7ea, + 0x7f4, 0x7f5, + 0x7fa, 0x7fa, + 0x800, 0x815, + 0x81a, 0x81a, + 0x824, 0x824, + 0x828, 0x828, + 0x840, 0x858, + 0x8a0, 0x8b2, + 0x904, 0x939, + 0x93d, 0x93d, + 0x950, 0x950, + 0x958, 0x961, + 0x971, 0x980, + 0x985, 0x98c, + 0x98f, 0x990, + 0x993, 0x9a8, + 0x9aa, 0x9b0, + 0x9b2, 0x9b2, + 0x9b6, 0x9b9, + 0x9bd, 0x9bd, + 0x9ce, 0x9ce, + 0x9dc, 0x9dd, + 0x9df, 0x9e1, + 0x9f0, 0x9f1, + 0xa05, 0xa0a, + 0xa0f, 0xa10, + 0xa13, 0xa28, + 0xa2a, 0xa30, + 0xa32, 0xa33, + 0xa35, 0xa36, + 0xa38, 0xa39, + 0xa59, 0xa5c, + 0xa5e, 0xa5e, + 0xa72, 0xa74, + 0xa85, 0xa8d, + 0xa8f, 0xa91, + 0xa93, 0xaa8, + 0xaaa, 0xab0, + 0xab2, 0xab3, + 0xab5, 0xab9, + 0xabd, 0xabd, + 0xad0, 0xad0, + 0xae0, 0xae1, + 0xb05, 0xb0c, + 0xb0f, 0xb10, + 0xb13, 0xb28, + 0xb2a, 0xb30, + 0xb32, 0xb33, + 0xb35, 0xb39, + 0xb3d, 0xb3d, + 0xb5c, 0xb5d, + 0xb5f, 0xb61, + 0xb71, 0xb71, + 0xb83, 0xb83, + 0xb85, 0xb8a, + 0xb8e, 0xb90, + 0xb92, 0xb95, + 0xb99, 0xb9a, + 0xb9c, 0xb9c, + 0xb9e, 0xb9f, + 0xba3, 0xba4, + 0xba8, 0xbaa, + 0xbae, 0xbb9, + 0xbd0, 0xbd0, + 0xc05, 0xc0c, + 0xc0e, 0xc10, + 0xc12, 0xc28, + 0xc2a, 0xc39, + 0xc3d, 0xc3d, + 0xc58, 0xc59, + 0xc60, 0xc61, + 0xc85, 0xc8c, + 0xc8e, 0xc90, + 0xc92, 0xca8, + 0xcaa, 0xcb3, + 0xcb5, 0xcb9, + 0xcbd, 0xcbd, + 0xcde, 0xcde, + 0xce0, 0xce1, + 0xcf1, 0xcf2, + 0xd05, 0xd0c, + 0xd0e, 0xd10, + 0xd12, 0xd3a, + 0xd3d, 0xd3d, + 0xd4e, 0xd4e, + 0xd60, 0xd61, + 0xd7a, 0xd7f, + 0xd85, 0xd96, + 0xd9a, 0xdb1, + 0xdb3, 0xdbb, + 0xdbd, 0xdbd, + 0xdc0, 0xdc6, + 0xe01, 0xe30, + 0xe32, 0xe33, + 0xe40, 0xe46, + 0xe81, 0xe82, + 0xe84, 0xe84, + 0xe87, 0xe88, + 0xe8a, 0xe8a, + 0xe8d, 0xe8d, + 0xe94, 0xe97, + 0xe99, 0xe9f, + 0xea1, 0xea3, + 0xea5, 0xea5, + 0xea7, 0xea7, + 0xeaa, 0xeab, + 0xead, 0xeb0, + 0xeb2, 0xeb3, + 0xebd, 0xebd, + 0xec0, 0xec4, + 0xec6, 0xec6, + 0xedc, 0xedf, + 0xf00, 0xf00, + 0xf40, 0xf47, + 0xf49, 0xf6c, + 0xf88, 0xf8c, + 0x1000, 0x102a, + 0x103f, 0x103f, + 0x1050, 0x1055, + 0x105a, 0x105d, + 0x1061, 0x1061, + 0x1065, 0x1066, + 0x106e, 0x1070, + 0x1075, 0x1081, + 0x108e, 0x108e, + 0x10a0, 0x10c5, + 0x10c7, 0x10c7, + 0x10cd, 0x10cd, + 0x10d0, 0x10fa, + 0x10fc, 0x1248, + 0x124a, 0x124d, + 0x1250, 0x1256, + 0x1258, 0x1258, + 0x125a, 0x125d, + 0x1260, 0x1288, + 0x128a, 0x128d, + 0x1290, 0x12b0, + 0x12b2, 0x12b5, + 0x12b8, 0x12be, + 0x12c0, 0x12c0, + 0x12c2, 0x12c5, + 0x12c8, 0x12d6, + 0x12d8, 0x1310, + 0x1312, 0x1315, + 0x1318, 0x135a, + 0x1380, 0x138f, + 0x13a0, 0x13f4, + 0x1401, 0x166c, + 0x166f, 0x167f, + 0x1681, 0x169a, + 0x16a0, 0x16ea, + 0x16f1, 0x16f8, + 0x1700, 0x170c, + 0x170e, 0x1711, + 0x1720, 0x1731, + 0x1740, 0x1751, + 0x1760, 0x176c, + 0x176e, 0x1770, + 0x1780, 0x17b3, + 0x17d7, 0x17d7, + 0x17dc, 0x17dc, + 0x1820, 0x1877, + 0x1880, 0x18a8, + 0x18aa, 0x18aa, + 0x18b0, 0x18f5, + 0x1900, 0x191e, + 0x1950, 0x196d, + 0x1970, 0x1974, + 0x1980, 0x19ab, + 0x19c1, 0x19c7, + 0x1a00, 0x1a16, + 0x1a20, 0x1a54, + 0x1aa7, 0x1aa7, + 0x1b05, 0x1b33, + 0x1b45, 0x1b4b, + 0x1b83, 0x1ba0, + 0x1bae, 0x1baf, + 0x1bba, 0x1be5, + 0x1c00, 0x1c23, + 0x1c4d, 0x1c4f, + 0x1c5a, 0x1c7d, + 0x1ce9, 0x1cec, + 0x1cee, 0x1cf1, + 0x1cf5, 0x1cf6, + 0x1d00, 0x1dbf, + 0x1e00, 0x1f15, + 0x1f18, 0x1f1d, + 0x1f20, 0x1f45, + 0x1f48, 0x1f4d, + 0x1f50, 0x1f57, + 0x1f59, 0x1f59, + 0x1f5b, 0x1f5b, + 0x1f5d, 0x1f5d, + 0x1f5f, 0x1f7d, + 0x1f80, 0x1fb4, + 0x1fb6, 0x1fbc, + 0x1fbe, 0x1fbe, + 0x1fc2, 0x1fc4, + 0x1fc6, 0x1fcc, + 0x1fd0, 0x1fd3, + 0x1fd6, 0x1fdb, + 0x1fe0, 0x1fec, + 0x1ff2, 0x1ff4, + 0x1ff6, 0x1ffc, + 0x2071, 0x2071, + 0x207f, 0x207f, + 0x2090, 0x209c, + 0x2102, 0x2102, + 0x2107, 0x2107, + 0x210a, 0x2113, + 0x2115, 0x2115, + 0x2119, 0x211d, + 0x2124, 0x2124, + 0x2126, 0x2126, + 0x2128, 0x2128, + 0x212a, 0x212d, + 0x212f, 0x2139, + 0x213c, 0x213f, + 0x2145, 0x2149, + 0x214e, 0x214e, + 0x2183, 0x2184, + 0x2c00, 0x2c2e, + 0x2c30, 0x2c5e, + 0x2c60, 0x2ce4, + 0x2ceb, 0x2cee, + 0x2cf2, 0x2cf3, + 0x2d00, 0x2d25, + 0x2d27, 0x2d27, + 0x2d2d, 0x2d2d, + 0x2d30, 0x2d67, + 0x2d6f, 0x2d6f, + 0x2d80, 0x2d96, + 0x2da0, 0x2da6, + 0x2da8, 0x2dae, + 0x2db0, 0x2db6, + 0x2db8, 0x2dbe, + 0x2dc0, 0x2dc6, + 0x2dc8, 0x2dce, + 0x2dd0, 0x2dd6, + 0x2dd8, 0x2dde, + 0x2e2f, 0x2e2f, + 0x3005, 0x3006, + 0x3031, 0x3035, + 0x303b, 0x303c, + 0x3041, 0x3096, + 0x309d, 0x309f, + 0x30a1, 0x30fa, + 0x30fc, 0x30ff, + 0x3105, 0x312d, + 0x3131, 0x318e, + 0x31a0, 0x31ba, + 0x31f0, 0x31ff, + 0x3400, 0x4db5, + 0x4e00, 0x9fcc, + 0xa000, 0xa48c, + 0xa4d0, 0xa4fd, + 0xa500, 0xa60c, + 0xa610, 0xa61f, + 0xa62a, 0xa62b, + 0xa640, 0xa66e, + 0xa67f, 0xa69d, + 0xa6a0, 0xa6e5, + 0xa717, 0xa71f, + 0xa722, 0xa788, + 0xa78b, 0xa78e, + 0xa790, 0xa7ad, + 0xa7b0, 0xa7b1, + 0xa7f7, 0xa801, + 0xa803, 0xa805, + 0xa807, 0xa80a, + 0xa80c, 0xa822, + 0xa840, 0xa873, + 0xa882, 0xa8b3, + 0xa8f2, 0xa8f7, + 0xa8fb, 0xa8fb, + 0xa90a, 0xa925, + 0xa930, 0xa946, + 0xa960, 0xa97c, + 0xa984, 0xa9b2, + 0xa9cf, 0xa9cf, + 0xa9e0, 0xa9e4, + 0xa9e6, 0xa9ef, + 0xa9fa, 0xa9fe, + 0xaa00, 0xaa28, + 0xaa40, 0xaa42, + 0xaa44, 0xaa4b, + 0xaa60, 0xaa76, + 0xaa7a, 0xaa7a, + 0xaa7e, 0xaaaf, + 0xaab1, 0xaab1, + 0xaab5, 0xaab6, + 0xaab9, 0xaabd, + 0xaac0, 0xaac0, + 0xaac2, 0xaac2, + 0xaadb, 0xaadd, + 0xaae0, 0xaaea, + 0xaaf2, 0xaaf4, + 0xab01, 0xab06, + 0xab09, 0xab0e, + 0xab11, 0xab16, + 0xab20, 0xab26, + 0xab28, 0xab2e, + 0xab30, 0xab5a, + 0xab5c, 0xab5f, + 0xab64, 0xab65, + 0xabc0, 0xabe2, + 0xac00, 0xd7a3, + 0xd7b0, 0xd7c6, + 0xd7cb, 0xd7fb, + 0xf900, 0xfa6d, + 0xfa70, 0xfad9, + 0xfb00, 0xfb06, + 0xfb13, 0xfb17, + 0xfb1d, 0xfb1d, + 0xfb1f, 0xfb28, + 0xfb2a, 0xfb36, + 0xfb38, 0xfb3c, + 0xfb3e, 0xfb3e, + 0xfb40, 0xfb41, + 0xfb43, 0xfb44, + 0xfb46, 0xfbb1, + 0xfbd3, 0xfd3d, + 0xfd50, 0xfd8f, + 0xfd92, 0xfdc7, + 0xfdf0, 0xfdfb, + 0xfe70, 0xfe74, + 0xfe76, 0xfefc, + 0xff21, 0xff3a, + 0xff41, 0xff5a, + 0xff66, 0xffbe, + 0xffc2, 0xffc7, + 0xffca, 0xffcf, + 0xffd2, 0xffd7, + 0xffda, 0xffdc, + 0x10000, 0x1000b, + 0x1000d, 0x10026, + 0x10028, 0x1003a, + 0x1003c, 0x1003d, + 0x1003f, 0x1004d, + 0x10050, 0x1005d, + 0x10080, 0x100fa, + 0x10280, 0x1029c, + 0x102a0, 0x102d0, + 0x10300, 0x1031f, + 0x10330, 0x10340, + 0x10342, 0x10349, + 0x10350, 0x10375, + 0x10380, 0x1039d, + 0x103a0, 0x103c3, + 0x103c8, 0x103cf, + 0x10400, 0x1049d, + 0x10500, 0x10527, + 0x10530, 0x10563, + 0x10600, 0x10736, + 0x10740, 0x10755, + 0x10760, 0x10767, + 0x10800, 0x10805, + 0x10808, 0x10808, + 0x1080a, 0x10835, + 0x10837, 0x10838, + 0x1083c, 0x1083c, + 0x1083f, 0x10855, + 0x10860, 0x10876, + 0x10880, 0x1089e, + 0x10900, 0x10915, + 0x10920, 0x10939, + 0x10980, 0x109b7, + 0x109be, 0x109bf, + 0x10a00, 0x10a00, + 0x10a10, 0x10a13, + 0x10a15, 0x10a17, + 0x10a19, 0x10a33, + 0x10a60, 0x10a7c, + 0x10a80, 0x10a9c, + 0x10ac0, 0x10ac7, + 0x10ac9, 0x10ae4, + 0x10b00, 0x10b35, + 0x10b40, 0x10b55, + 0x10b60, 0x10b72, + 0x10b80, 0x10b91, + 0x10c00, 0x10c48, + 0x11003, 0x11037, + 0x11083, 0x110af, + 0x110d0, 0x110e8, + 0x11103, 0x11126, + 0x11150, 0x11172, + 0x11176, 0x11176, + 0x11183, 0x111b2, + 0x111c1, 0x111c4, + 0x111da, 0x111da, + 0x11200, 0x11211, + 0x11213, 0x1122b, + 0x112b0, 0x112de, + 0x11305, 0x1130c, + 0x1130f, 0x11310, + 0x11313, 0x11328, + 0x1132a, 0x11330, + 0x11332, 0x11333, + 0x11335, 0x11339, + 0x1133d, 0x1133d, + 0x1135d, 0x11361, + 0x11480, 0x114af, + 0x114c4, 0x114c5, + 0x114c7, 0x114c7, + 0x11580, 0x115ae, + 0x11600, 0x1162f, + 0x11644, 0x11644, + 0x11680, 0x116aa, + 0x118a0, 0x118df, + 0x118ff, 0x118ff, + 0x11ac0, 0x11af8, + 0x12000, 0x12398, + 0x13000, 0x1342e, + 0x16800, 0x16a38, + 0x16a40, 0x16a5e, + 0x16ad0, 0x16aed, + 0x16b00, 0x16b2f, + 0x16b40, 0x16b43, + 0x16b63, 0x16b77, + 0x16b7d, 0x16b8f, + 0x16f00, 0x16f44, + 0x16f50, 0x16f50, + 0x16f93, 0x16f9f, + 0x1b000, 0x1b001, + 0x1bc00, 0x1bc6a, + 0x1bc70, 0x1bc7c, + 0x1bc80, 0x1bc88, + 0x1bc90, 0x1bc99, + 0x1d400, 0x1d454, + 0x1d456, 0x1d49c, + 0x1d49e, 0x1d49f, + 0x1d4a2, 0x1d4a2, + 0x1d4a5, 0x1d4a6, + 0x1d4a9, 0x1d4ac, + 0x1d4ae, 0x1d4b9, + 0x1d4bb, 0x1d4bb, + 0x1d4bd, 0x1d4c3, + 0x1d4c5, 0x1d505, + 0x1d507, 0x1d50a, + 0x1d50d, 0x1d514, + 0x1d516, 0x1d51c, + 0x1d51e, 0x1d539, + 0x1d53b, 0x1d53e, + 0x1d540, 0x1d544, + 0x1d546, 0x1d546, + 0x1d54a, 0x1d550, + 0x1d552, 0x1d6a5, + 0x1d6a8, 0x1d6c0, + 0x1d6c2, 0x1d6da, + 0x1d6dc, 0x1d6fa, + 0x1d6fc, 0x1d714, + 0x1d716, 0x1d734, + 0x1d736, 0x1d74e, + 0x1d750, 0x1d76e, + 0x1d770, 0x1d788, + 0x1d78a, 0x1d7a8, + 0x1d7aa, 0x1d7c2, + 0x1d7c4, 0x1d7cb, + 0x1e800, 0x1e8c4, + 0x1ee00, 0x1ee03, + 0x1ee05, 0x1ee1f, + 0x1ee21, 0x1ee22, + 0x1ee24, 0x1ee24, + 0x1ee27, 0x1ee27, + 0x1ee29, 0x1ee32, + 0x1ee34, 0x1ee37, + 0x1ee39, 0x1ee39, + 0x1ee3b, 0x1ee3b, + 0x1ee42, 0x1ee42, + 0x1ee47, 0x1ee47, + 0x1ee49, 0x1ee49, + 0x1ee4b, 0x1ee4b, + 0x1ee4d, 0x1ee4f, + 0x1ee51, 0x1ee52, + 0x1ee54, 0x1ee54, + 0x1ee57, 0x1ee57, + 0x1ee59, 0x1ee59, + 0x1ee5b, 0x1ee5b, + 0x1ee5d, 0x1ee5d, + 0x1ee5f, 0x1ee5f, + 0x1ee61, 0x1ee62, + 0x1ee64, 0x1ee64, + 0x1ee67, 0x1ee6a, + 0x1ee6c, 0x1ee72, + 0x1ee74, 0x1ee77, + 0x1ee79, 0x1ee7c, + 0x1ee7e, 0x1ee7e, + 0x1ee80, 0x1ee89, + 0x1ee8b, 0x1ee9b, + 0x1eea1, 0x1eea3, + 0x1eea5, 0x1eea9, + 0x1eeab, 0x1eebb, + 0x20000, 0x2a6d6, + 0x2a700, 0x2b734, + 0x2b740, 0x2b81d, + 0x2f800, 0x2fa1d +}; +UCP_FN(L) + +static const unichar ucp_L_and_def[] = { + 0x41, 0x5a, + 0x61, 0x7a, + 0xb5, 0xb5, + 0xc0, 0xd6, + 0xd8, 0xf6, + 0xf8, 0x1ba, + 0x1bc, 0x1bf, + 0x1c4, 0x293, + 0x295, 0x2af, + 0x370, 0x373, + 0x376, 0x377, + 0x37b, 0x37d, + 0x37f, 0x37f, + 0x386, 0x386, + 0x388, 0x38a, + 0x38c, 0x38c, + 0x38e, 0x3a1, + 0x3a3, 0x3f5, + 0x3f7, 0x481, + 0x48a, 0x52f, + 0x531, 0x556, + 0x561, 0x587, + 0x10a0, 0x10c5, + 0x10c7, 0x10c7, + 0x10cd, 0x10cd, + 0x1d00, 0x1d2b, + 0x1d6b, 0x1d77, + 0x1d79, 0x1d9a, + 0x1e00, 0x1f15, + 0x1f18, 0x1f1d, + 0x1f20, 0x1f45, + 0x1f48, 0x1f4d, + 0x1f50, 0x1f57, + 0x1f59, 0x1f59, + 0x1f5b, 0x1f5b, + 0x1f5d, 0x1f5d, + 0x1f5f, 0x1f7d, + 0x1f80, 0x1fb4, + 0x1fb6, 0x1fbc, + 0x1fbe, 0x1fbe, + 0x1fc2, 0x1fc4, + 0x1fc6, 0x1fcc, + 0x1fd0, 0x1fd3, + 0x1fd6, 0x1fdb, + 0x1fe0, 0x1fec, + 0x1ff2, 0x1ff4, + 0x1ff6, 0x1ffc, + 0x2102, 0x2102, + 0x2107, 0x2107, + 0x210a, 0x2113, + 0x2115, 0x2115, + 0x2119, 0x211d, + 0x2124, 0x2124, + 0x2126, 0x2126, + 0x2128, 0x2128, + 0x212a, 0x212d, + 0x212f, 0x2134, + 0x2139, 0x2139, + 0x213c, 0x213f, + 0x2145, 0x2149, + 0x214e, 0x214e, + 0x2183, 0x2184, + 0x2c00, 0x2c2e, + 0x2c30, 0x2c5e, + 0x2c60, 0x2c7b, + 0x2c7e, 0x2ce4, + 0x2ceb, 0x2cee, + 0x2cf2, 0x2cf3, + 0x2d00, 0x2d25, + 0x2d27, 0x2d27, + 0x2d2d, 0x2d2d, + 0xa640, 0xa66d, + 0xa680, 0xa69b, + 0xa722, 0xa76f, + 0xa771, 0xa787, + 0xa78b, 0xa78e, + 0xa790, 0xa7ad, + 0xa7b0, 0xa7b1, + 0xa7fa, 0xa7fa, + 0xab30, 0xab5a, + 0xab64, 0xab65, + 0xfb00, 0xfb06, + 0xfb13, 0xfb17, + 0xff21, 0xff3a, + 0xff41, 0xff5a, + 0x10400, 0x1044f, + 0x118a0, 0x118df, + 0x1d400, 0x1d454, + 0x1d456, 0x1d49c, + 0x1d49e, 0x1d49f, + 0x1d4a2, 0x1d4a2, + 0x1d4a5, 0x1d4a6, + 0x1d4a9, 0x1d4ac, + 0x1d4ae, 0x1d4b9, + 0x1d4bb, 0x1d4bb, + 0x1d4bd, 0x1d4c3, + 0x1d4c5, 0x1d505, + 0x1d507, 0x1d50a, + 0x1d50d, 0x1d514, + 0x1d516, 0x1d51c, + 0x1d51e, 0x1d539, + 0x1d53b, 0x1d53e, + 0x1d540, 0x1d544, + 0x1d546, 0x1d546, + 0x1d54a, 0x1d550, + 0x1d552, 0x1d6a5, + 0x1d6a8, 0x1d6c0, + 0x1d6c2, 0x1d6da, + 0x1d6dc, 0x1d6fa, + 0x1d6fc, 0x1d714, + 0x1d716, 0x1d734, + 0x1d736, 0x1d74e, + 0x1d750, 0x1d76e, + 0x1d770, 0x1d788, + 0x1d78a, 0x1d7a8, + 0x1d7aa, 0x1d7c2, + 0x1d7c4, 0x1d7cb +}; +UCP_FN(L_and) + +static const unichar ucp_Ll_def[] = { + 0x61, 0x7a, + 0xb5, 0xb5, + 0xdf, 0xf6, + 0xf8, 0xff, + 0x101, 0x101, + 0x103, 0x103, + 0x105, 0x105, + 0x107, 0x107, + 0x109, 0x109, + 0x10b, 0x10b, + 0x10d, 0x10d, + 0x10f, 0x10f, + 0x111, 0x111, + 0x113, 0x113, + 0x115, 0x115, + 0x117, 0x117, + 0x119, 0x119, + 0x11b, 0x11b, + 0x11d, 0x11d, + 0x11f, 0x11f, + 0x121, 0x121, + 0x123, 0x123, + 0x125, 0x125, + 0x127, 0x127, + 0x129, 0x129, + 0x12b, 0x12b, + 0x12d, 0x12d, + 0x12f, 0x12f, + 0x131, 0x131, + 0x133, 0x133, + 0x135, 0x135, + 0x137, 0x138, + 0x13a, 0x13a, + 0x13c, 0x13c, + 0x13e, 0x13e, + 0x140, 0x140, + 0x142, 0x142, + 0x144, 0x144, + 0x146, 0x146, + 0x148, 0x149, + 0x14b, 0x14b, + 0x14d, 0x14d, + 0x14f, 0x14f, + 0x151, 0x151, + 0x153, 0x153, + 0x155, 0x155, + 0x157, 0x157, + 0x159, 0x159, + 0x15b, 0x15b, + 0x15d, 0x15d, + 0x15f, 0x15f, + 0x161, 0x161, + 0x163, 0x163, + 0x165, 0x165, + 0x167, 0x167, + 0x169, 0x169, + 0x16b, 0x16b, + 0x16d, 0x16d, + 0x16f, 0x16f, + 0x171, 0x171, + 0x173, 0x173, + 0x175, 0x175, + 0x177, 0x177, + 0x17a, 0x17a, + 0x17c, 0x17c, + 0x17e, 0x180, + 0x183, 0x183, + 0x185, 0x185, + 0x188, 0x188, + 0x18c, 0x18d, + 0x192, 0x192, + 0x195, 0x195, + 0x199, 0x19b, + 0x19e, 0x19e, + 0x1a1, 0x1a1, + 0x1a3, 0x1a3, + 0x1a5, 0x1a5, + 0x1a8, 0x1a8, + 0x1aa, 0x1ab, + 0x1ad, 0x1ad, + 0x1b0, 0x1b0, + 0x1b4, 0x1b4, + 0x1b6, 0x1b6, + 0x1b9, 0x1ba, + 0x1bd, 0x1bf, + 0x1c6, 0x1c6, + 0x1c9, 0x1c9, + 0x1cc, 0x1cc, + 0x1ce, 0x1ce, + 0x1d0, 0x1d0, + 0x1d2, 0x1d2, + 0x1d4, 0x1d4, + 0x1d6, 0x1d6, + 0x1d8, 0x1d8, + 0x1da, 0x1da, + 0x1dc, 0x1dd, + 0x1df, 0x1df, + 0x1e1, 0x1e1, + 0x1e3, 0x1e3, + 0x1e5, 0x1e5, + 0x1e7, 0x1e7, + 0x1e9, 0x1e9, + 0x1eb, 0x1eb, + 0x1ed, 0x1ed, + 0x1ef, 0x1f0, + 0x1f3, 0x1f3, + 0x1f5, 0x1f5, + 0x1f9, 0x1f9, + 0x1fb, 0x1fb, + 0x1fd, 0x1fd, + 0x1ff, 0x1ff, + 0x201, 0x201, + 0x203, 0x203, + 0x205, 0x205, + 0x207, 0x207, + 0x209, 0x209, + 0x20b, 0x20b, + 0x20d, 0x20d, + 0x20f, 0x20f, + 0x211, 0x211, + 0x213, 0x213, + 0x215, 0x215, + 0x217, 0x217, + 0x219, 0x219, + 0x21b, 0x21b, + 0x21d, 0x21d, + 0x21f, 0x21f, + 0x221, 0x221, + 0x223, 0x223, + 0x225, 0x225, + 0x227, 0x227, + 0x229, 0x229, + 0x22b, 0x22b, + 0x22d, 0x22d, + 0x22f, 0x22f, + 0x231, 0x231, + 0x233, 0x239, + 0x23c, 0x23c, + 0x23f, 0x240, + 0x242, 0x242, + 0x247, 0x247, + 0x249, 0x249, + 0x24b, 0x24b, + 0x24d, 0x24d, + 0x24f, 0x293, + 0x295, 0x2af, + 0x371, 0x371, + 0x373, 0x373, + 0x377, 0x377, + 0x37b, 0x37d, + 0x390, 0x390, + 0x3ac, 0x3ce, + 0x3d0, 0x3d1, + 0x3d5, 0x3d7, + 0x3d9, 0x3d9, + 0x3db, 0x3db, + 0x3dd, 0x3dd, + 0x3df, 0x3df, + 0x3e1, 0x3e1, + 0x3e3, 0x3e3, + 0x3e5, 0x3e5, + 0x3e7, 0x3e7, + 0x3e9, 0x3e9, + 0x3eb, 0x3eb, + 0x3ed, 0x3ed, + 0x3ef, 0x3f3, + 0x3f5, 0x3f5, + 0x3f8, 0x3f8, + 0x3fb, 0x3fc, + 0x430, 0x45f, + 0x461, 0x461, + 0x463, 0x463, + 0x465, 0x465, + 0x467, 0x467, + 0x469, 0x469, + 0x46b, 0x46b, + 0x46d, 0x46d, + 0x46f, 0x46f, + 0x471, 0x471, + 0x473, 0x473, + 0x475, 0x475, + 0x477, 0x477, + 0x479, 0x479, + 0x47b, 0x47b, + 0x47d, 0x47d, + 0x47f, 0x47f, + 0x481, 0x481, + 0x48b, 0x48b, + 0x48d, 0x48d, + 0x48f, 0x48f, + 0x491, 0x491, + 0x493, 0x493, + 0x495, 0x495, + 0x497, 0x497, + 0x499, 0x499, + 0x49b, 0x49b, + 0x49d, 0x49d, + 0x49f, 0x49f, + 0x4a1, 0x4a1, + 0x4a3, 0x4a3, + 0x4a5, 0x4a5, + 0x4a7, 0x4a7, + 0x4a9, 0x4a9, + 0x4ab, 0x4ab, + 0x4ad, 0x4ad, + 0x4af, 0x4af, + 0x4b1, 0x4b1, + 0x4b3, 0x4b3, + 0x4b5, 0x4b5, + 0x4b7, 0x4b7, + 0x4b9, 0x4b9, + 0x4bb, 0x4bb, + 0x4bd, 0x4bd, + 0x4bf, 0x4bf, + 0x4c2, 0x4c2, + 0x4c4, 0x4c4, + 0x4c6, 0x4c6, + 0x4c8, 0x4c8, + 0x4ca, 0x4ca, + 0x4cc, 0x4cc, + 0x4ce, 0x4cf, + 0x4d1, 0x4d1, + 0x4d3, 0x4d3, + 0x4d5, 0x4d5, + 0x4d7, 0x4d7, + 0x4d9, 0x4d9, + 0x4db, 0x4db, + 0x4dd, 0x4dd, + 0x4df, 0x4df, + 0x4e1, 0x4e1, + 0x4e3, 0x4e3, + 0x4e5, 0x4e5, + 0x4e7, 0x4e7, + 0x4e9, 0x4e9, + 0x4eb, 0x4eb, + 0x4ed, 0x4ed, + 0x4ef, 0x4ef, + 0x4f1, 0x4f1, + 0x4f3, 0x4f3, + 0x4f5, 0x4f5, + 0x4f7, 0x4f7, + 0x4f9, 0x4f9, + 0x4fb, 0x4fb, + 0x4fd, 0x4fd, + 0x4ff, 0x4ff, + 0x501, 0x501, + 0x503, 0x503, + 0x505, 0x505, + 0x507, 0x507, + 0x509, 0x509, + 0x50b, 0x50b, + 0x50d, 0x50d, + 0x50f, 0x50f, + 0x511, 0x511, + 0x513, 0x513, + 0x515, 0x515, + 0x517, 0x517, + 0x519, 0x519, + 0x51b, 0x51b, + 0x51d, 0x51d, + 0x51f, 0x51f, + 0x521, 0x521, + 0x523, 0x523, + 0x525, 0x525, + 0x527, 0x527, + 0x529, 0x529, + 0x52b, 0x52b, + 0x52d, 0x52d, + 0x52f, 0x52f, + 0x561, 0x587, + 0x1d00, 0x1d2b, + 0x1d6b, 0x1d77, + 0x1d79, 0x1d9a, + 0x1e01, 0x1e01, + 0x1e03, 0x1e03, + 0x1e05, 0x1e05, + 0x1e07, 0x1e07, + 0x1e09, 0x1e09, + 0x1e0b, 0x1e0b, + 0x1e0d, 0x1e0d, + 0x1e0f, 0x1e0f, + 0x1e11, 0x1e11, + 0x1e13, 0x1e13, + 0x1e15, 0x1e15, + 0x1e17, 0x1e17, + 0x1e19, 0x1e19, + 0x1e1b, 0x1e1b, + 0x1e1d, 0x1e1d, + 0x1e1f, 0x1e1f, + 0x1e21, 0x1e21, + 0x1e23, 0x1e23, + 0x1e25, 0x1e25, + 0x1e27, 0x1e27, + 0x1e29, 0x1e29, + 0x1e2b, 0x1e2b, + 0x1e2d, 0x1e2d, + 0x1e2f, 0x1e2f, + 0x1e31, 0x1e31, + 0x1e33, 0x1e33, + 0x1e35, 0x1e35, + 0x1e37, 0x1e37, + 0x1e39, 0x1e39, + 0x1e3b, 0x1e3b, + 0x1e3d, 0x1e3d, + 0x1e3f, 0x1e3f, + 0x1e41, 0x1e41, + 0x1e43, 0x1e43, + 0x1e45, 0x1e45, + 0x1e47, 0x1e47, + 0x1e49, 0x1e49, + 0x1e4b, 0x1e4b, + 0x1e4d, 0x1e4d, + 0x1e4f, 0x1e4f, + 0x1e51, 0x1e51, + 0x1e53, 0x1e53, + 0x1e55, 0x1e55, + 0x1e57, 0x1e57, + 0x1e59, 0x1e59, + 0x1e5b, 0x1e5b, + 0x1e5d, 0x1e5d, + 0x1e5f, 0x1e5f, + 0x1e61, 0x1e61, + 0x1e63, 0x1e63, + 0x1e65, 0x1e65, + 0x1e67, 0x1e67, + 0x1e69, 0x1e69, + 0x1e6b, 0x1e6b, + 0x1e6d, 0x1e6d, + 0x1e6f, 0x1e6f, + 0x1e71, 0x1e71, + 0x1e73, 0x1e73, + 0x1e75, 0x1e75, + 0x1e77, 0x1e77, + 0x1e79, 0x1e79, + 0x1e7b, 0x1e7b, + 0x1e7d, 0x1e7d, + 0x1e7f, 0x1e7f, + 0x1e81, 0x1e81, + 0x1e83, 0x1e83, + 0x1e85, 0x1e85, + 0x1e87, 0x1e87, + 0x1e89, 0x1e89, + 0x1e8b, 0x1e8b, + 0x1e8d, 0x1e8d, + 0x1e8f, 0x1e8f, + 0x1e91, 0x1e91, + 0x1e93, 0x1e93, + 0x1e95, 0x1e9d, + 0x1e9f, 0x1e9f, + 0x1ea1, 0x1ea1, + 0x1ea3, 0x1ea3, + 0x1ea5, 0x1ea5, + 0x1ea7, 0x1ea7, + 0x1ea9, 0x1ea9, + 0x1eab, 0x1eab, + 0x1ead, 0x1ead, + 0x1eaf, 0x1eaf, + 0x1eb1, 0x1eb1, + 0x1eb3, 0x1eb3, + 0x1eb5, 0x1eb5, + 0x1eb7, 0x1eb7, + 0x1eb9, 0x1eb9, + 0x1ebb, 0x1ebb, + 0x1ebd, 0x1ebd, + 0x1ebf, 0x1ebf, + 0x1ec1, 0x1ec1, + 0x1ec3, 0x1ec3, + 0x1ec5, 0x1ec5, + 0x1ec7, 0x1ec7, + 0x1ec9, 0x1ec9, + 0x1ecb, 0x1ecb, + 0x1ecd, 0x1ecd, + 0x1ecf, 0x1ecf, + 0x1ed1, 0x1ed1, + 0x1ed3, 0x1ed3, + 0x1ed5, 0x1ed5, + 0x1ed7, 0x1ed7, + 0x1ed9, 0x1ed9, + 0x1edb, 0x1edb, + 0x1edd, 0x1edd, + 0x1edf, 0x1edf, + 0x1ee1, 0x1ee1, + 0x1ee3, 0x1ee3, + 0x1ee5, 0x1ee5, + 0x1ee7, 0x1ee7, + 0x1ee9, 0x1ee9, + 0x1eeb, 0x1eeb, + 0x1eed, 0x1eed, + 0x1eef, 0x1eef, + 0x1ef1, 0x1ef1, + 0x1ef3, 0x1ef3, + 0x1ef5, 0x1ef5, + 0x1ef7, 0x1ef7, + 0x1ef9, 0x1ef9, + 0x1efb, 0x1efb, + 0x1efd, 0x1efd, + 0x1eff, 0x1f07, + 0x1f10, 0x1f15, + 0x1f20, 0x1f27, + 0x1f30, 0x1f37, + 0x1f40, 0x1f45, + 0x1f50, 0x1f57, + 0x1f60, 0x1f67, + 0x1f70, 0x1f7d, + 0x1f80, 0x1f87, + 0x1f90, 0x1f97, + 0x1fa0, 0x1fa7, + 0x1fb0, 0x1fb4, + 0x1fb6, 0x1fb7, + 0x1fbe, 0x1fbe, + 0x1fc2, 0x1fc4, + 0x1fc6, 0x1fc7, + 0x1fd0, 0x1fd3, + 0x1fd6, 0x1fd7, + 0x1fe0, 0x1fe7, + 0x1ff2, 0x1ff4, + 0x1ff6, 0x1ff7, + 0x210a, 0x210a, + 0x210e, 0x210f, + 0x2113, 0x2113, + 0x212f, 0x212f, + 0x2134, 0x2134, + 0x2139, 0x2139, + 0x213c, 0x213d, + 0x2146, 0x2149, + 0x214e, 0x214e, + 0x2184, 0x2184, + 0x2c30, 0x2c5e, + 0x2c61, 0x2c61, + 0x2c65, 0x2c66, + 0x2c68, 0x2c68, + 0x2c6a, 0x2c6a, + 0x2c6c, 0x2c6c, + 0x2c71, 0x2c71, + 0x2c73, 0x2c74, + 0x2c76, 0x2c7b, + 0x2c81, 0x2c81, + 0x2c83, 0x2c83, + 0x2c85, 0x2c85, + 0x2c87, 0x2c87, + 0x2c89, 0x2c89, + 0x2c8b, 0x2c8b, + 0x2c8d, 0x2c8d, + 0x2c8f, 0x2c8f, + 0x2c91, 0x2c91, + 0x2c93, 0x2c93, + 0x2c95, 0x2c95, + 0x2c97, 0x2c97, + 0x2c99, 0x2c99, + 0x2c9b, 0x2c9b, + 0x2c9d, 0x2c9d, + 0x2c9f, 0x2c9f, + 0x2ca1, 0x2ca1, + 0x2ca3, 0x2ca3, + 0x2ca5, 0x2ca5, + 0x2ca7, 0x2ca7, + 0x2ca9, 0x2ca9, + 0x2cab, 0x2cab, + 0x2cad, 0x2cad, + 0x2caf, 0x2caf, + 0x2cb1, 0x2cb1, + 0x2cb3, 0x2cb3, + 0x2cb5, 0x2cb5, + 0x2cb7, 0x2cb7, + 0x2cb9, 0x2cb9, + 0x2cbb, 0x2cbb, + 0x2cbd, 0x2cbd, + 0x2cbf, 0x2cbf, + 0x2cc1, 0x2cc1, + 0x2cc3, 0x2cc3, + 0x2cc5, 0x2cc5, + 0x2cc7, 0x2cc7, + 0x2cc9, 0x2cc9, + 0x2ccb, 0x2ccb, + 0x2ccd, 0x2ccd, + 0x2ccf, 0x2ccf, + 0x2cd1, 0x2cd1, + 0x2cd3, 0x2cd3, + 0x2cd5, 0x2cd5, + 0x2cd7, 0x2cd7, + 0x2cd9, 0x2cd9, + 0x2cdb, 0x2cdb, + 0x2cdd, 0x2cdd, + 0x2cdf, 0x2cdf, + 0x2ce1, 0x2ce1, + 0x2ce3, 0x2ce4, + 0x2cec, 0x2cec, + 0x2cee, 0x2cee, + 0x2cf3, 0x2cf3, + 0x2d00, 0x2d25, + 0x2d27, 0x2d27, + 0x2d2d, 0x2d2d, + 0xa641, 0xa641, + 0xa643, 0xa643, + 0xa645, 0xa645, + 0xa647, 0xa647, + 0xa649, 0xa649, + 0xa64b, 0xa64b, + 0xa64d, 0xa64d, + 0xa64f, 0xa64f, + 0xa651, 0xa651, + 0xa653, 0xa653, + 0xa655, 0xa655, + 0xa657, 0xa657, + 0xa659, 0xa659, + 0xa65b, 0xa65b, + 0xa65d, 0xa65d, + 0xa65f, 0xa65f, + 0xa661, 0xa661, + 0xa663, 0xa663, + 0xa665, 0xa665, + 0xa667, 0xa667, + 0xa669, 0xa669, + 0xa66b, 0xa66b, + 0xa66d, 0xa66d, + 0xa681, 0xa681, + 0xa683, 0xa683, + 0xa685, 0xa685, + 0xa687, 0xa687, + 0xa689, 0xa689, + 0xa68b, 0xa68b, + 0xa68d, 0xa68d, + 0xa68f, 0xa68f, + 0xa691, 0xa691, + 0xa693, 0xa693, + 0xa695, 0xa695, + 0xa697, 0xa697, + 0xa699, 0xa699, + 0xa69b, 0xa69b, + 0xa723, 0xa723, + 0xa725, 0xa725, + 0xa727, 0xa727, + 0xa729, 0xa729, + 0xa72b, 0xa72b, + 0xa72d, 0xa72d, + 0xa72f, 0xa731, + 0xa733, 0xa733, + 0xa735, 0xa735, + 0xa737, 0xa737, + 0xa739, 0xa739, + 0xa73b, 0xa73b, + 0xa73d, 0xa73d, + 0xa73f, 0xa73f, + 0xa741, 0xa741, + 0xa743, 0xa743, + 0xa745, 0xa745, + 0xa747, 0xa747, + 0xa749, 0xa749, + 0xa74b, 0xa74b, + 0xa74d, 0xa74d, + 0xa74f, 0xa74f, + 0xa751, 0xa751, + 0xa753, 0xa753, + 0xa755, 0xa755, + 0xa757, 0xa757, + 0xa759, 0xa759, + 0xa75b, 0xa75b, + 0xa75d, 0xa75d, + 0xa75f, 0xa75f, + 0xa761, 0xa761, + 0xa763, 0xa763, + 0xa765, 0xa765, + 0xa767, 0xa767, + 0xa769, 0xa769, + 0xa76b, 0xa76b, + 0xa76d, 0xa76d, + 0xa76f, 0xa76f, + 0xa771, 0xa778, + 0xa77a, 0xa77a, + 0xa77c, 0xa77c, + 0xa77f, 0xa77f, + 0xa781, 0xa781, + 0xa783, 0xa783, + 0xa785, 0xa785, + 0xa787, 0xa787, + 0xa78c, 0xa78c, + 0xa78e, 0xa78e, + 0xa791, 0xa791, + 0xa793, 0xa795, + 0xa797, 0xa797, + 0xa799, 0xa799, + 0xa79b, 0xa79b, + 0xa79d, 0xa79d, + 0xa79f, 0xa79f, + 0xa7a1, 0xa7a1, + 0xa7a3, 0xa7a3, + 0xa7a5, 0xa7a5, + 0xa7a7, 0xa7a7, + 0xa7a9, 0xa7a9, + 0xa7fa, 0xa7fa, + 0xab30, 0xab5a, + 0xab64, 0xab65, + 0xfb00, 0xfb06, + 0xfb13, 0xfb17, + 0xff41, 0xff5a, + 0x10428, 0x1044f, + 0x118c0, 0x118df, + 0x1d41a, 0x1d433, + 0x1d44e, 0x1d454, + 0x1d456, 0x1d467, + 0x1d482, 0x1d49b, + 0x1d4b6, 0x1d4b9, + 0x1d4bb, 0x1d4bb, + 0x1d4bd, 0x1d4c3, + 0x1d4c5, 0x1d4cf, + 0x1d4ea, 0x1d503, + 0x1d51e, 0x1d537, + 0x1d552, 0x1d56b, + 0x1d586, 0x1d59f, + 0x1d5ba, 0x1d5d3, + 0x1d5ee, 0x1d607, + 0x1d622, 0x1d63b, + 0x1d656, 0x1d66f, + 0x1d68a, 0x1d6a5, + 0x1d6c2, 0x1d6da, + 0x1d6dc, 0x1d6e1, + 0x1d6fc, 0x1d714, + 0x1d716, 0x1d71b, + 0x1d736, 0x1d74e, + 0x1d750, 0x1d755, + 0x1d770, 0x1d788, + 0x1d78a, 0x1d78f, + 0x1d7aa, 0x1d7c2, + 0x1d7c4, 0x1d7c9, + 0x1d7cb, 0x1d7cb +}; +UCP_FN(Ll) + +static const unichar ucp_Lm_def[] = { + 0x2b0, 0x2c1, + 0x2c6, 0x2d1, + 0x2e0, 0x2e4, + 0x2ec, 0x2ec, + 0x2ee, 0x2ee, + 0x374, 0x374, + 0x37a, 0x37a, + 0x559, 0x559, + 0x640, 0x640, + 0x6e5, 0x6e6, + 0x7f4, 0x7f5, + 0x7fa, 0x7fa, + 0x81a, 0x81a, + 0x824, 0x824, + 0x828, 0x828, + 0x971, 0x971, + 0xe46, 0xe46, + 0xec6, 0xec6, + 0x10fc, 0x10fc, + 0x17d7, 0x17d7, + 0x1843, 0x1843, + 0x1aa7, 0x1aa7, + 0x1c78, 0x1c7d, + 0x1d2c, 0x1d6a, + 0x1d78, 0x1d78, + 0x1d9b, 0x1dbf, + 0x2071, 0x2071, + 0x207f, 0x207f, + 0x2090, 0x209c, + 0x2c7c, 0x2c7d, + 0x2d6f, 0x2d6f, + 0x2e2f, 0x2e2f, + 0x3005, 0x3005, + 0x3031, 0x3035, + 0x303b, 0x303b, + 0x309d, 0x309e, + 0x30fc, 0x30fe, + 0xa015, 0xa015, + 0xa4f8, 0xa4fd, + 0xa60c, 0xa60c, + 0xa67f, 0xa67f, + 0xa69c, 0xa69d, + 0xa717, 0xa71f, + 0xa770, 0xa770, + 0xa788, 0xa788, + 0xa7f8, 0xa7f9, + 0xa9cf, 0xa9cf, + 0xa9e6, 0xa9e6, + 0xaa70, 0xaa70, + 0xaadd, 0xaadd, + 0xaaf3, 0xaaf4, + 0xab5c, 0xab5f, + 0xff70, 0xff70, + 0xff9e, 0xff9f, + 0x16b40, 0x16b43, + 0x16f93, 0x16f9f +}; +UCP_FN(Lm) + +static const unichar ucp_Lo_def[] = { + 0xaa, 0xaa, + 0xba, 0xba, + 0x1bb, 0x1bb, + 0x1c0, 0x1c3, + 0x294, 0x294, + 0x5d0, 0x5ea, + 0x5f0, 0x5f2, + 0x620, 0x63f, + 0x641, 0x64a, + 0x66e, 0x66f, + 0x671, 0x6d3, + 0x6d5, 0x6d5, + 0x6ee, 0x6ef, + 0x6fa, 0x6fc, + 0x6ff, 0x6ff, + 0x710, 0x710, + 0x712, 0x72f, + 0x74d, 0x7a5, + 0x7b1, 0x7b1, + 0x7ca, 0x7ea, + 0x800, 0x815, + 0x840, 0x858, + 0x8a0, 0x8b2, + 0x904, 0x939, + 0x93d, 0x93d, + 0x950, 0x950, + 0x958, 0x961, + 0x972, 0x980, + 0x985, 0x98c, + 0x98f, 0x990, + 0x993, 0x9a8, + 0x9aa, 0x9b0, + 0x9b2, 0x9b2, + 0x9b6, 0x9b9, + 0x9bd, 0x9bd, + 0x9ce, 0x9ce, + 0x9dc, 0x9dd, + 0x9df, 0x9e1, + 0x9f0, 0x9f1, + 0xa05, 0xa0a, + 0xa0f, 0xa10, + 0xa13, 0xa28, + 0xa2a, 0xa30, + 0xa32, 0xa33, + 0xa35, 0xa36, + 0xa38, 0xa39, + 0xa59, 0xa5c, + 0xa5e, 0xa5e, + 0xa72, 0xa74, + 0xa85, 0xa8d, + 0xa8f, 0xa91, + 0xa93, 0xaa8, + 0xaaa, 0xab0, + 0xab2, 0xab3, + 0xab5, 0xab9, + 0xabd, 0xabd, + 0xad0, 0xad0, + 0xae0, 0xae1, + 0xb05, 0xb0c, + 0xb0f, 0xb10, + 0xb13, 0xb28, + 0xb2a, 0xb30, + 0xb32, 0xb33, + 0xb35, 0xb39, + 0xb3d, 0xb3d, + 0xb5c, 0xb5d, + 0xb5f, 0xb61, + 0xb71, 0xb71, + 0xb83, 0xb83, + 0xb85, 0xb8a, + 0xb8e, 0xb90, + 0xb92, 0xb95, + 0xb99, 0xb9a, + 0xb9c, 0xb9c, + 0xb9e, 0xb9f, + 0xba3, 0xba4, + 0xba8, 0xbaa, + 0xbae, 0xbb9, + 0xbd0, 0xbd0, + 0xc05, 0xc0c, + 0xc0e, 0xc10, + 0xc12, 0xc28, + 0xc2a, 0xc39, + 0xc3d, 0xc3d, + 0xc58, 0xc59, + 0xc60, 0xc61, + 0xc85, 0xc8c, + 0xc8e, 0xc90, + 0xc92, 0xca8, + 0xcaa, 0xcb3, + 0xcb5, 0xcb9, + 0xcbd, 0xcbd, + 0xcde, 0xcde, + 0xce0, 0xce1, + 0xcf1, 0xcf2, + 0xd05, 0xd0c, + 0xd0e, 0xd10, + 0xd12, 0xd3a, + 0xd3d, 0xd3d, + 0xd4e, 0xd4e, + 0xd60, 0xd61, + 0xd7a, 0xd7f, + 0xd85, 0xd96, + 0xd9a, 0xdb1, + 0xdb3, 0xdbb, + 0xdbd, 0xdbd, + 0xdc0, 0xdc6, + 0xe01, 0xe30, + 0xe32, 0xe33, + 0xe40, 0xe45, + 0xe81, 0xe82, + 0xe84, 0xe84, + 0xe87, 0xe88, + 0xe8a, 0xe8a, + 0xe8d, 0xe8d, + 0xe94, 0xe97, + 0xe99, 0xe9f, + 0xea1, 0xea3, + 0xea5, 0xea5, + 0xea7, 0xea7, + 0xeaa, 0xeab, + 0xead, 0xeb0, + 0xeb2, 0xeb3, + 0xebd, 0xebd, + 0xec0, 0xec4, + 0xedc, 0xedf, + 0xf00, 0xf00, + 0xf40, 0xf47, + 0xf49, 0xf6c, + 0xf88, 0xf8c, + 0x1000, 0x102a, + 0x103f, 0x103f, + 0x1050, 0x1055, + 0x105a, 0x105d, + 0x1061, 0x1061, + 0x1065, 0x1066, + 0x106e, 0x1070, + 0x1075, 0x1081, + 0x108e, 0x108e, + 0x10d0, 0x10fa, + 0x10fd, 0x1248, + 0x124a, 0x124d, + 0x1250, 0x1256, + 0x1258, 0x1258, + 0x125a, 0x125d, + 0x1260, 0x1288, + 0x128a, 0x128d, + 0x1290, 0x12b0, + 0x12b2, 0x12b5, + 0x12b8, 0x12be, + 0x12c0, 0x12c0, + 0x12c2, 0x12c5, + 0x12c8, 0x12d6, + 0x12d8, 0x1310, + 0x1312, 0x1315, + 0x1318, 0x135a, + 0x1380, 0x138f, + 0x13a0, 0x13f4, + 0x1401, 0x166c, + 0x166f, 0x167f, + 0x1681, 0x169a, + 0x16a0, 0x16ea, + 0x16f1, 0x16f8, + 0x1700, 0x170c, + 0x170e, 0x1711, + 0x1720, 0x1731, + 0x1740, 0x1751, + 0x1760, 0x176c, + 0x176e, 0x1770, + 0x1780, 0x17b3, + 0x17dc, 0x17dc, + 0x1820, 0x1842, + 0x1844, 0x1877, + 0x1880, 0x18a8, + 0x18aa, 0x18aa, + 0x18b0, 0x18f5, + 0x1900, 0x191e, + 0x1950, 0x196d, + 0x1970, 0x1974, + 0x1980, 0x19ab, + 0x19c1, 0x19c7, + 0x1a00, 0x1a16, + 0x1a20, 0x1a54, + 0x1b05, 0x1b33, + 0x1b45, 0x1b4b, + 0x1b83, 0x1ba0, + 0x1bae, 0x1baf, + 0x1bba, 0x1be5, + 0x1c00, 0x1c23, + 0x1c4d, 0x1c4f, + 0x1c5a, 0x1c77, + 0x1ce9, 0x1cec, + 0x1cee, 0x1cf1, + 0x1cf5, 0x1cf6, + 0x2135, 0x2138, + 0x2d30, 0x2d67, + 0x2d80, 0x2d96, + 0x2da0, 0x2da6, + 0x2da8, 0x2dae, + 0x2db0, 0x2db6, + 0x2db8, 0x2dbe, + 0x2dc0, 0x2dc6, + 0x2dc8, 0x2dce, + 0x2dd0, 0x2dd6, + 0x2dd8, 0x2dde, + 0x3006, 0x3006, + 0x303c, 0x303c, + 0x3041, 0x3096, + 0x309f, 0x309f, + 0x30a1, 0x30fa, + 0x30ff, 0x30ff, + 0x3105, 0x312d, + 0x3131, 0x318e, + 0x31a0, 0x31ba, + 0x31f0, 0x31ff, + 0x3400, 0x4db5, + 0x4e00, 0x9fcc, + 0xa000, 0xa014, + 0xa016, 0xa48c, + 0xa4d0, 0xa4f7, + 0xa500, 0xa60b, + 0xa610, 0xa61f, + 0xa62a, 0xa62b, + 0xa66e, 0xa66e, + 0xa6a0, 0xa6e5, + 0xa7f7, 0xa7f7, + 0xa7fb, 0xa801, + 0xa803, 0xa805, + 0xa807, 0xa80a, + 0xa80c, 0xa822, + 0xa840, 0xa873, + 0xa882, 0xa8b3, + 0xa8f2, 0xa8f7, + 0xa8fb, 0xa8fb, + 0xa90a, 0xa925, + 0xa930, 0xa946, + 0xa960, 0xa97c, + 0xa984, 0xa9b2, + 0xa9e0, 0xa9e4, + 0xa9e7, 0xa9ef, + 0xa9fa, 0xa9fe, + 0xaa00, 0xaa28, + 0xaa40, 0xaa42, + 0xaa44, 0xaa4b, + 0xaa60, 0xaa6f, + 0xaa71, 0xaa76, + 0xaa7a, 0xaa7a, + 0xaa7e, 0xaaaf, + 0xaab1, 0xaab1, + 0xaab5, 0xaab6, + 0xaab9, 0xaabd, + 0xaac0, 0xaac0, + 0xaac2, 0xaac2, + 0xaadb, 0xaadc, + 0xaae0, 0xaaea, + 0xaaf2, 0xaaf2, + 0xab01, 0xab06, + 0xab09, 0xab0e, + 0xab11, 0xab16, + 0xab20, 0xab26, + 0xab28, 0xab2e, + 0xabc0, 0xabe2, + 0xac00, 0xd7a3, + 0xd7b0, 0xd7c6, + 0xd7cb, 0xd7fb, + 0xf900, 0xfa6d, + 0xfa70, 0xfad9, + 0xfb1d, 0xfb1d, + 0xfb1f, 0xfb28, + 0xfb2a, 0xfb36, + 0xfb38, 0xfb3c, + 0xfb3e, 0xfb3e, + 0xfb40, 0xfb41, + 0xfb43, 0xfb44, + 0xfb46, 0xfbb1, + 0xfbd3, 0xfd3d, + 0xfd50, 0xfd8f, + 0xfd92, 0xfdc7, + 0xfdf0, 0xfdfb, + 0xfe70, 0xfe74, + 0xfe76, 0xfefc, + 0xff66, 0xff6f, + 0xff71, 0xff9d, + 0xffa0, 0xffbe, + 0xffc2, 0xffc7, + 0xffca, 0xffcf, + 0xffd2, 0xffd7, + 0xffda, 0xffdc, + 0x10000, 0x1000b, + 0x1000d, 0x10026, + 0x10028, 0x1003a, + 0x1003c, 0x1003d, + 0x1003f, 0x1004d, + 0x10050, 0x1005d, + 0x10080, 0x100fa, + 0x10280, 0x1029c, + 0x102a0, 0x102d0, + 0x10300, 0x1031f, + 0x10330, 0x10340, + 0x10342, 0x10349, + 0x10350, 0x10375, + 0x10380, 0x1039d, + 0x103a0, 0x103c3, + 0x103c8, 0x103cf, + 0x10450, 0x1049d, + 0x10500, 0x10527, + 0x10530, 0x10563, + 0x10600, 0x10736, + 0x10740, 0x10755, + 0x10760, 0x10767, + 0x10800, 0x10805, + 0x10808, 0x10808, + 0x1080a, 0x10835, + 0x10837, 0x10838, + 0x1083c, 0x1083c, + 0x1083f, 0x10855, + 0x10860, 0x10876, + 0x10880, 0x1089e, + 0x10900, 0x10915, + 0x10920, 0x10939, + 0x10980, 0x109b7, + 0x109be, 0x109bf, + 0x10a00, 0x10a00, + 0x10a10, 0x10a13, + 0x10a15, 0x10a17, + 0x10a19, 0x10a33, + 0x10a60, 0x10a7c, + 0x10a80, 0x10a9c, + 0x10ac0, 0x10ac7, + 0x10ac9, 0x10ae4, + 0x10b00, 0x10b35, + 0x10b40, 0x10b55, + 0x10b60, 0x10b72, + 0x10b80, 0x10b91, + 0x10c00, 0x10c48, + 0x11003, 0x11037, + 0x11083, 0x110af, + 0x110d0, 0x110e8, + 0x11103, 0x11126, + 0x11150, 0x11172, + 0x11176, 0x11176, + 0x11183, 0x111b2, + 0x111c1, 0x111c4, + 0x111da, 0x111da, + 0x11200, 0x11211, + 0x11213, 0x1122b, + 0x112b0, 0x112de, + 0x11305, 0x1130c, + 0x1130f, 0x11310, + 0x11313, 0x11328, + 0x1132a, 0x11330, + 0x11332, 0x11333, + 0x11335, 0x11339, + 0x1133d, 0x1133d, + 0x1135d, 0x11361, + 0x11480, 0x114af, + 0x114c4, 0x114c5, + 0x114c7, 0x114c7, + 0x11580, 0x115ae, + 0x11600, 0x1162f, + 0x11644, 0x11644, + 0x11680, 0x116aa, + 0x118ff, 0x118ff, + 0x11ac0, 0x11af8, + 0x12000, 0x12398, + 0x13000, 0x1342e, + 0x16800, 0x16a38, + 0x16a40, 0x16a5e, + 0x16ad0, 0x16aed, + 0x16b00, 0x16b2f, + 0x16b63, 0x16b77, + 0x16b7d, 0x16b8f, + 0x16f00, 0x16f44, + 0x16f50, 0x16f50, + 0x1b000, 0x1b001, + 0x1bc00, 0x1bc6a, + 0x1bc70, 0x1bc7c, + 0x1bc80, 0x1bc88, + 0x1bc90, 0x1bc99, + 0x1e800, 0x1e8c4, + 0x1ee00, 0x1ee03, + 0x1ee05, 0x1ee1f, + 0x1ee21, 0x1ee22, + 0x1ee24, 0x1ee24, + 0x1ee27, 0x1ee27, + 0x1ee29, 0x1ee32, + 0x1ee34, 0x1ee37, + 0x1ee39, 0x1ee39, + 0x1ee3b, 0x1ee3b, + 0x1ee42, 0x1ee42, + 0x1ee47, 0x1ee47, + 0x1ee49, 0x1ee49, + 0x1ee4b, 0x1ee4b, + 0x1ee4d, 0x1ee4f, + 0x1ee51, 0x1ee52, + 0x1ee54, 0x1ee54, + 0x1ee57, 0x1ee57, + 0x1ee59, 0x1ee59, + 0x1ee5b, 0x1ee5b, + 0x1ee5d, 0x1ee5d, + 0x1ee5f, 0x1ee5f, + 0x1ee61, 0x1ee62, + 0x1ee64, 0x1ee64, + 0x1ee67, 0x1ee6a, + 0x1ee6c, 0x1ee72, + 0x1ee74, 0x1ee77, + 0x1ee79, 0x1ee7c, + 0x1ee7e, 0x1ee7e, + 0x1ee80, 0x1ee89, + 0x1ee8b, 0x1ee9b, + 0x1eea1, 0x1eea3, + 0x1eea5, 0x1eea9, + 0x1eeab, 0x1eebb, + 0x20000, 0x2a6d6, + 0x2a700, 0x2b734, + 0x2b740, 0x2b81d, + 0x2f800, 0x2fa1d +}; +UCP_FN(Lo) + +static const unichar ucp_Lt_def[] = { + 0x1c5, 0x1c5, + 0x1c8, 0x1c8, + 0x1cb, 0x1cb, + 0x1f2, 0x1f2, + 0x1f88, 0x1f8f, + 0x1f98, 0x1f9f, + 0x1fa8, 0x1faf, + 0x1fbc, 0x1fbc, + 0x1fcc, 0x1fcc, + 0x1ffc, 0x1ffc +}; +UCP_FN(Lt) + +static const unichar ucp_Lu_def[] = { + 0x41, 0x5a, + 0xc0, 0xd6, + 0xd8, 0xde, + 0x100, 0x100, + 0x102, 0x102, + 0x104, 0x104, + 0x106, 0x106, + 0x108, 0x108, + 0x10a, 0x10a, + 0x10c, 0x10c, + 0x10e, 0x10e, + 0x110, 0x110, + 0x112, 0x112, + 0x114, 0x114, + 0x116, 0x116, + 0x118, 0x118, + 0x11a, 0x11a, + 0x11c, 0x11c, + 0x11e, 0x11e, + 0x120, 0x120, + 0x122, 0x122, + 0x124, 0x124, + 0x126, 0x126, + 0x128, 0x128, + 0x12a, 0x12a, + 0x12c, 0x12c, + 0x12e, 0x12e, + 0x130, 0x130, + 0x132, 0x132, + 0x134, 0x134, + 0x136, 0x136, + 0x139, 0x139, + 0x13b, 0x13b, + 0x13d, 0x13d, + 0x13f, 0x13f, + 0x141, 0x141, + 0x143, 0x143, + 0x145, 0x145, + 0x147, 0x147, + 0x14a, 0x14a, + 0x14c, 0x14c, + 0x14e, 0x14e, + 0x150, 0x150, + 0x152, 0x152, + 0x154, 0x154, + 0x156, 0x156, + 0x158, 0x158, + 0x15a, 0x15a, + 0x15c, 0x15c, + 0x15e, 0x15e, + 0x160, 0x160, + 0x162, 0x162, + 0x164, 0x164, + 0x166, 0x166, + 0x168, 0x168, + 0x16a, 0x16a, + 0x16c, 0x16c, + 0x16e, 0x16e, + 0x170, 0x170, + 0x172, 0x172, + 0x174, 0x174, + 0x176, 0x176, + 0x178, 0x179, + 0x17b, 0x17b, + 0x17d, 0x17d, + 0x181, 0x182, + 0x184, 0x184, + 0x186, 0x187, + 0x189, 0x18b, + 0x18e, 0x191, + 0x193, 0x194, + 0x196, 0x198, + 0x19c, 0x19d, + 0x19f, 0x1a0, + 0x1a2, 0x1a2, + 0x1a4, 0x1a4, + 0x1a6, 0x1a7, + 0x1a9, 0x1a9, + 0x1ac, 0x1ac, + 0x1ae, 0x1af, + 0x1b1, 0x1b3, + 0x1b5, 0x1b5, + 0x1b7, 0x1b8, + 0x1bc, 0x1bc, + 0x1c4, 0x1c4, + 0x1c7, 0x1c7, + 0x1ca, 0x1ca, + 0x1cd, 0x1cd, + 0x1cf, 0x1cf, + 0x1d1, 0x1d1, + 0x1d3, 0x1d3, + 0x1d5, 0x1d5, + 0x1d7, 0x1d7, + 0x1d9, 0x1d9, + 0x1db, 0x1db, + 0x1de, 0x1de, + 0x1e0, 0x1e0, + 0x1e2, 0x1e2, + 0x1e4, 0x1e4, + 0x1e6, 0x1e6, + 0x1e8, 0x1e8, + 0x1ea, 0x1ea, + 0x1ec, 0x1ec, + 0x1ee, 0x1ee, + 0x1f1, 0x1f1, + 0x1f4, 0x1f4, + 0x1f6, 0x1f8, + 0x1fa, 0x1fa, + 0x1fc, 0x1fc, + 0x1fe, 0x1fe, + 0x200, 0x200, + 0x202, 0x202, + 0x204, 0x204, + 0x206, 0x206, + 0x208, 0x208, + 0x20a, 0x20a, + 0x20c, 0x20c, + 0x20e, 0x20e, + 0x210, 0x210, + 0x212, 0x212, + 0x214, 0x214, + 0x216, 0x216, + 0x218, 0x218, + 0x21a, 0x21a, + 0x21c, 0x21c, + 0x21e, 0x21e, + 0x220, 0x220, + 0x222, 0x222, + 0x224, 0x224, + 0x226, 0x226, + 0x228, 0x228, + 0x22a, 0x22a, + 0x22c, 0x22c, + 0x22e, 0x22e, + 0x230, 0x230, + 0x232, 0x232, + 0x23a, 0x23b, + 0x23d, 0x23e, + 0x241, 0x241, + 0x243, 0x246, + 0x248, 0x248, + 0x24a, 0x24a, + 0x24c, 0x24c, + 0x24e, 0x24e, + 0x370, 0x370, + 0x372, 0x372, + 0x376, 0x376, + 0x37f, 0x37f, + 0x386, 0x386, + 0x388, 0x38a, + 0x38c, 0x38c, + 0x38e, 0x38f, + 0x391, 0x3a1, + 0x3a3, 0x3ab, + 0x3cf, 0x3cf, + 0x3d2, 0x3d4, + 0x3d8, 0x3d8, + 0x3da, 0x3da, + 0x3dc, 0x3dc, + 0x3de, 0x3de, + 0x3e0, 0x3e0, + 0x3e2, 0x3e2, + 0x3e4, 0x3e4, + 0x3e6, 0x3e6, + 0x3e8, 0x3e8, + 0x3ea, 0x3ea, + 0x3ec, 0x3ec, + 0x3ee, 0x3ee, + 0x3f4, 0x3f4, + 0x3f7, 0x3f7, + 0x3f9, 0x3fa, + 0x3fd, 0x42f, + 0x460, 0x460, + 0x462, 0x462, + 0x464, 0x464, + 0x466, 0x466, + 0x468, 0x468, + 0x46a, 0x46a, + 0x46c, 0x46c, + 0x46e, 0x46e, + 0x470, 0x470, + 0x472, 0x472, + 0x474, 0x474, + 0x476, 0x476, + 0x478, 0x478, + 0x47a, 0x47a, + 0x47c, 0x47c, + 0x47e, 0x47e, + 0x480, 0x480, + 0x48a, 0x48a, + 0x48c, 0x48c, + 0x48e, 0x48e, + 0x490, 0x490, + 0x492, 0x492, + 0x494, 0x494, + 0x496, 0x496, + 0x498, 0x498, + 0x49a, 0x49a, + 0x49c, 0x49c, + 0x49e, 0x49e, + 0x4a0, 0x4a0, + 0x4a2, 0x4a2, + 0x4a4, 0x4a4, + 0x4a6, 0x4a6, + 0x4a8, 0x4a8, + 0x4aa, 0x4aa, + 0x4ac, 0x4ac, + 0x4ae, 0x4ae, + 0x4b0, 0x4b0, + 0x4b2, 0x4b2, + 0x4b4, 0x4b4, + 0x4b6, 0x4b6, + 0x4b8, 0x4b8, + 0x4ba, 0x4ba, + 0x4bc, 0x4bc, + 0x4be, 0x4be, + 0x4c0, 0x4c1, + 0x4c3, 0x4c3, + 0x4c5, 0x4c5, + 0x4c7, 0x4c7, + 0x4c9, 0x4c9, + 0x4cb, 0x4cb, + 0x4cd, 0x4cd, + 0x4d0, 0x4d0, + 0x4d2, 0x4d2, + 0x4d4, 0x4d4, + 0x4d6, 0x4d6, + 0x4d8, 0x4d8, + 0x4da, 0x4da, + 0x4dc, 0x4dc, + 0x4de, 0x4de, + 0x4e0, 0x4e0, + 0x4e2, 0x4e2, + 0x4e4, 0x4e4, + 0x4e6, 0x4e6, + 0x4e8, 0x4e8, + 0x4ea, 0x4ea, + 0x4ec, 0x4ec, + 0x4ee, 0x4ee, + 0x4f0, 0x4f0, + 0x4f2, 0x4f2, + 0x4f4, 0x4f4, + 0x4f6, 0x4f6, + 0x4f8, 0x4f8, + 0x4fa, 0x4fa, + 0x4fc, 0x4fc, + 0x4fe, 0x4fe, + 0x500, 0x500, + 0x502, 0x502, + 0x504, 0x504, + 0x506, 0x506, + 0x508, 0x508, + 0x50a, 0x50a, + 0x50c, 0x50c, + 0x50e, 0x50e, + 0x510, 0x510, + 0x512, 0x512, + 0x514, 0x514, + 0x516, 0x516, + 0x518, 0x518, + 0x51a, 0x51a, + 0x51c, 0x51c, + 0x51e, 0x51e, + 0x520, 0x520, + 0x522, 0x522, + 0x524, 0x524, + 0x526, 0x526, + 0x528, 0x528, + 0x52a, 0x52a, + 0x52c, 0x52c, + 0x52e, 0x52e, + 0x531, 0x556, + 0x10a0, 0x10c5, + 0x10c7, 0x10c7, + 0x10cd, 0x10cd, + 0x1e00, 0x1e00, + 0x1e02, 0x1e02, + 0x1e04, 0x1e04, + 0x1e06, 0x1e06, + 0x1e08, 0x1e08, + 0x1e0a, 0x1e0a, + 0x1e0c, 0x1e0c, + 0x1e0e, 0x1e0e, + 0x1e10, 0x1e10, + 0x1e12, 0x1e12, + 0x1e14, 0x1e14, + 0x1e16, 0x1e16, + 0x1e18, 0x1e18, + 0x1e1a, 0x1e1a, + 0x1e1c, 0x1e1c, + 0x1e1e, 0x1e1e, + 0x1e20, 0x1e20, + 0x1e22, 0x1e22, + 0x1e24, 0x1e24, + 0x1e26, 0x1e26, + 0x1e28, 0x1e28, + 0x1e2a, 0x1e2a, + 0x1e2c, 0x1e2c, + 0x1e2e, 0x1e2e, + 0x1e30, 0x1e30, + 0x1e32, 0x1e32, + 0x1e34, 0x1e34, + 0x1e36, 0x1e36, + 0x1e38, 0x1e38, + 0x1e3a, 0x1e3a, + 0x1e3c, 0x1e3c, + 0x1e3e, 0x1e3e, + 0x1e40, 0x1e40, + 0x1e42, 0x1e42, + 0x1e44, 0x1e44, + 0x1e46, 0x1e46, + 0x1e48, 0x1e48, + 0x1e4a, 0x1e4a, + 0x1e4c, 0x1e4c, + 0x1e4e, 0x1e4e, + 0x1e50, 0x1e50, + 0x1e52, 0x1e52, + 0x1e54, 0x1e54, + 0x1e56, 0x1e56, + 0x1e58, 0x1e58, + 0x1e5a, 0x1e5a, + 0x1e5c, 0x1e5c, + 0x1e5e, 0x1e5e, + 0x1e60, 0x1e60, + 0x1e62, 0x1e62, + 0x1e64, 0x1e64, + 0x1e66, 0x1e66, + 0x1e68, 0x1e68, + 0x1e6a, 0x1e6a, + 0x1e6c, 0x1e6c, + 0x1e6e, 0x1e6e, + 0x1e70, 0x1e70, + 0x1e72, 0x1e72, + 0x1e74, 0x1e74, + 0x1e76, 0x1e76, + 0x1e78, 0x1e78, + 0x1e7a, 0x1e7a, + 0x1e7c, 0x1e7c, + 0x1e7e, 0x1e7e, + 0x1e80, 0x1e80, + 0x1e82, 0x1e82, + 0x1e84, 0x1e84, + 0x1e86, 0x1e86, + 0x1e88, 0x1e88, + 0x1e8a, 0x1e8a, + 0x1e8c, 0x1e8c, + 0x1e8e, 0x1e8e, + 0x1e90, 0x1e90, + 0x1e92, 0x1e92, + 0x1e94, 0x1e94, + 0x1e9e, 0x1e9e, + 0x1ea0, 0x1ea0, + 0x1ea2, 0x1ea2, + 0x1ea4, 0x1ea4, + 0x1ea6, 0x1ea6, + 0x1ea8, 0x1ea8, + 0x1eaa, 0x1eaa, + 0x1eac, 0x1eac, + 0x1eae, 0x1eae, + 0x1eb0, 0x1eb0, + 0x1eb2, 0x1eb2, + 0x1eb4, 0x1eb4, + 0x1eb6, 0x1eb6, + 0x1eb8, 0x1eb8, + 0x1eba, 0x1eba, + 0x1ebc, 0x1ebc, + 0x1ebe, 0x1ebe, + 0x1ec0, 0x1ec0, + 0x1ec2, 0x1ec2, + 0x1ec4, 0x1ec4, + 0x1ec6, 0x1ec6, + 0x1ec8, 0x1ec8, + 0x1eca, 0x1eca, + 0x1ecc, 0x1ecc, + 0x1ece, 0x1ece, + 0x1ed0, 0x1ed0, + 0x1ed2, 0x1ed2, + 0x1ed4, 0x1ed4, + 0x1ed6, 0x1ed6, + 0x1ed8, 0x1ed8, + 0x1eda, 0x1eda, + 0x1edc, 0x1edc, + 0x1ede, 0x1ede, + 0x1ee0, 0x1ee0, + 0x1ee2, 0x1ee2, + 0x1ee4, 0x1ee4, + 0x1ee6, 0x1ee6, + 0x1ee8, 0x1ee8, + 0x1eea, 0x1eea, + 0x1eec, 0x1eec, + 0x1eee, 0x1eee, + 0x1ef0, 0x1ef0, + 0x1ef2, 0x1ef2, + 0x1ef4, 0x1ef4, + 0x1ef6, 0x1ef6, + 0x1ef8, 0x1ef8, + 0x1efa, 0x1efa, + 0x1efc, 0x1efc, + 0x1efe, 0x1efe, + 0x1f08, 0x1f0f, + 0x1f18, 0x1f1d, + 0x1f28, 0x1f2f, + 0x1f38, 0x1f3f, + 0x1f48, 0x1f4d, + 0x1f59, 0x1f59, + 0x1f5b, 0x1f5b, + 0x1f5d, 0x1f5d, + 0x1f5f, 0x1f5f, + 0x1f68, 0x1f6f, + 0x1fb8, 0x1fbb, + 0x1fc8, 0x1fcb, + 0x1fd8, 0x1fdb, + 0x1fe8, 0x1fec, + 0x1ff8, 0x1ffb, + 0x2102, 0x2102, + 0x2107, 0x2107, + 0x210b, 0x210d, + 0x2110, 0x2112, + 0x2115, 0x2115, + 0x2119, 0x211d, + 0x2124, 0x2124, + 0x2126, 0x2126, + 0x2128, 0x2128, + 0x212a, 0x212d, + 0x2130, 0x2133, + 0x213e, 0x213f, + 0x2145, 0x2145, + 0x2183, 0x2183, + 0x2c00, 0x2c2e, + 0x2c60, 0x2c60, + 0x2c62, 0x2c64, + 0x2c67, 0x2c67, + 0x2c69, 0x2c69, + 0x2c6b, 0x2c6b, + 0x2c6d, 0x2c70, + 0x2c72, 0x2c72, + 0x2c75, 0x2c75, + 0x2c7e, 0x2c80, + 0x2c82, 0x2c82, + 0x2c84, 0x2c84, + 0x2c86, 0x2c86, + 0x2c88, 0x2c88, + 0x2c8a, 0x2c8a, + 0x2c8c, 0x2c8c, + 0x2c8e, 0x2c8e, + 0x2c90, 0x2c90, + 0x2c92, 0x2c92, + 0x2c94, 0x2c94, + 0x2c96, 0x2c96, + 0x2c98, 0x2c98, + 0x2c9a, 0x2c9a, + 0x2c9c, 0x2c9c, + 0x2c9e, 0x2c9e, + 0x2ca0, 0x2ca0, + 0x2ca2, 0x2ca2, + 0x2ca4, 0x2ca4, + 0x2ca6, 0x2ca6, + 0x2ca8, 0x2ca8, + 0x2caa, 0x2caa, + 0x2cac, 0x2cac, + 0x2cae, 0x2cae, + 0x2cb0, 0x2cb0, + 0x2cb2, 0x2cb2, + 0x2cb4, 0x2cb4, + 0x2cb6, 0x2cb6, + 0x2cb8, 0x2cb8, + 0x2cba, 0x2cba, + 0x2cbc, 0x2cbc, + 0x2cbe, 0x2cbe, + 0x2cc0, 0x2cc0, + 0x2cc2, 0x2cc2, + 0x2cc4, 0x2cc4, + 0x2cc6, 0x2cc6, + 0x2cc8, 0x2cc8, + 0x2cca, 0x2cca, + 0x2ccc, 0x2ccc, + 0x2cce, 0x2cce, + 0x2cd0, 0x2cd0, + 0x2cd2, 0x2cd2, + 0x2cd4, 0x2cd4, + 0x2cd6, 0x2cd6, + 0x2cd8, 0x2cd8, + 0x2cda, 0x2cda, + 0x2cdc, 0x2cdc, + 0x2cde, 0x2cde, + 0x2ce0, 0x2ce0, + 0x2ce2, 0x2ce2, + 0x2ceb, 0x2ceb, + 0x2ced, 0x2ced, + 0x2cf2, 0x2cf2, + 0xa640, 0xa640, + 0xa642, 0xa642, + 0xa644, 0xa644, + 0xa646, 0xa646, + 0xa648, 0xa648, + 0xa64a, 0xa64a, + 0xa64c, 0xa64c, + 0xa64e, 0xa64e, + 0xa650, 0xa650, + 0xa652, 0xa652, + 0xa654, 0xa654, + 0xa656, 0xa656, + 0xa658, 0xa658, + 0xa65a, 0xa65a, + 0xa65c, 0xa65c, + 0xa65e, 0xa65e, + 0xa660, 0xa660, + 0xa662, 0xa662, + 0xa664, 0xa664, + 0xa666, 0xa666, + 0xa668, 0xa668, + 0xa66a, 0xa66a, + 0xa66c, 0xa66c, + 0xa680, 0xa680, + 0xa682, 0xa682, + 0xa684, 0xa684, + 0xa686, 0xa686, + 0xa688, 0xa688, + 0xa68a, 0xa68a, + 0xa68c, 0xa68c, + 0xa68e, 0xa68e, + 0xa690, 0xa690, + 0xa692, 0xa692, + 0xa694, 0xa694, + 0xa696, 0xa696, + 0xa698, 0xa698, + 0xa69a, 0xa69a, + 0xa722, 0xa722, + 0xa724, 0xa724, + 0xa726, 0xa726, + 0xa728, 0xa728, + 0xa72a, 0xa72a, + 0xa72c, 0xa72c, + 0xa72e, 0xa72e, + 0xa732, 0xa732, + 0xa734, 0xa734, + 0xa736, 0xa736, + 0xa738, 0xa738, + 0xa73a, 0xa73a, + 0xa73c, 0xa73c, + 0xa73e, 0xa73e, + 0xa740, 0xa740, + 0xa742, 0xa742, + 0xa744, 0xa744, + 0xa746, 0xa746, + 0xa748, 0xa748, + 0xa74a, 0xa74a, + 0xa74c, 0xa74c, + 0xa74e, 0xa74e, + 0xa750, 0xa750, + 0xa752, 0xa752, + 0xa754, 0xa754, + 0xa756, 0xa756, + 0xa758, 0xa758, + 0xa75a, 0xa75a, + 0xa75c, 0xa75c, + 0xa75e, 0xa75e, + 0xa760, 0xa760, + 0xa762, 0xa762, + 0xa764, 0xa764, + 0xa766, 0xa766, + 0xa768, 0xa768, + 0xa76a, 0xa76a, + 0xa76c, 0xa76c, + 0xa76e, 0xa76e, + 0xa779, 0xa779, + 0xa77b, 0xa77b, + 0xa77d, 0xa77e, + 0xa780, 0xa780, + 0xa782, 0xa782, + 0xa784, 0xa784, + 0xa786, 0xa786, + 0xa78b, 0xa78b, + 0xa78d, 0xa78d, + 0xa790, 0xa790, + 0xa792, 0xa792, + 0xa796, 0xa796, + 0xa798, 0xa798, + 0xa79a, 0xa79a, + 0xa79c, 0xa79c, + 0xa79e, 0xa79e, + 0xa7a0, 0xa7a0, + 0xa7a2, 0xa7a2, + 0xa7a4, 0xa7a4, + 0xa7a6, 0xa7a6, + 0xa7a8, 0xa7a8, + 0xa7aa, 0xa7ad, + 0xa7b0, 0xa7b1, + 0xff21, 0xff3a, + 0x10400, 0x10427, + 0x118a0, 0x118bf, + 0x1d400, 0x1d419, + 0x1d434, 0x1d44d, + 0x1d468, 0x1d481, + 0x1d49c, 0x1d49c, + 0x1d49e, 0x1d49f, + 0x1d4a2, 0x1d4a2, + 0x1d4a5, 0x1d4a6, + 0x1d4a9, 0x1d4ac, + 0x1d4ae, 0x1d4b5, + 0x1d4d0, 0x1d4e9, + 0x1d504, 0x1d505, + 0x1d507, 0x1d50a, + 0x1d50d, 0x1d514, + 0x1d516, 0x1d51c, + 0x1d538, 0x1d539, + 0x1d53b, 0x1d53e, + 0x1d540, 0x1d544, + 0x1d546, 0x1d546, + 0x1d54a, 0x1d550, + 0x1d56c, 0x1d585, + 0x1d5a0, 0x1d5b9, + 0x1d5d4, 0x1d5ed, + 0x1d608, 0x1d621, + 0x1d63c, 0x1d655, + 0x1d670, 0x1d689, + 0x1d6a8, 0x1d6c0, + 0x1d6e2, 0x1d6fa, + 0x1d71c, 0x1d734, + 0x1d756, 0x1d76e, + 0x1d790, 0x1d7a8, + 0x1d7ca, 0x1d7ca +}; +UCP_FN(Lu) + +static const unichar ucp_M_def[] = { + 0x300, 0x36f, + 0x483, 0x489, + 0x591, 0x5bd, + 0x5bf, 0x5bf, + 0x5c1, 0x5c2, + 0x5c4, 0x5c5, + 0x5c7, 0x5c7, + 0x610, 0x61a, + 0x64b, 0x65f, + 0x670, 0x670, + 0x6d6, 0x6dc, + 0x6df, 0x6e4, + 0x6e7, 0x6e8, + 0x6ea, 0x6ed, + 0x711, 0x711, + 0x730, 0x74a, + 0x7a6, 0x7b0, + 0x7eb, 0x7f3, + 0x816, 0x819, + 0x81b, 0x823, + 0x825, 0x827, + 0x829, 0x82d, + 0x859, 0x85b, + 0x8e4, 0x903, + 0x93a, 0x93c, + 0x93e, 0x94f, + 0x951, 0x957, + 0x962, 0x963, + 0x981, 0x983, + 0x9bc, 0x9bc, + 0x9be, 0x9c4, + 0x9c7, 0x9c8, + 0x9cb, 0x9cd, + 0x9d7, 0x9d7, + 0x9e2, 0x9e3, + 0xa01, 0xa03, + 0xa3c, 0xa3c, + 0xa3e, 0xa42, + 0xa47, 0xa48, + 0xa4b, 0xa4d, + 0xa51, 0xa51, + 0xa70, 0xa71, + 0xa75, 0xa75, + 0xa81, 0xa83, + 0xabc, 0xabc, + 0xabe, 0xac5, + 0xac7, 0xac9, + 0xacb, 0xacd, + 0xae2, 0xae3, + 0xb01, 0xb03, + 0xb3c, 0xb3c, + 0xb3e, 0xb44, + 0xb47, 0xb48, + 0xb4b, 0xb4d, + 0xb56, 0xb57, + 0xb62, 0xb63, + 0xb82, 0xb82, + 0xbbe, 0xbc2, + 0xbc6, 0xbc8, + 0xbca, 0xbcd, + 0xbd7, 0xbd7, + 0xc00, 0xc03, + 0xc3e, 0xc44, + 0xc46, 0xc48, + 0xc4a, 0xc4d, + 0xc55, 0xc56, + 0xc62, 0xc63, + 0xc81, 0xc83, + 0xcbc, 0xcbc, + 0xcbe, 0xcc4, + 0xcc6, 0xcc8, + 0xcca, 0xccd, + 0xcd5, 0xcd6, + 0xce2, 0xce3, + 0xd01, 0xd03, + 0xd3e, 0xd44, + 0xd46, 0xd48, + 0xd4a, 0xd4d, + 0xd57, 0xd57, + 0xd62, 0xd63, + 0xd82, 0xd83, + 0xdca, 0xdca, + 0xdcf, 0xdd4, + 0xdd6, 0xdd6, + 0xdd8, 0xddf, + 0xdf2, 0xdf3, + 0xe31, 0xe31, + 0xe34, 0xe3a, + 0xe47, 0xe4e, + 0xeb1, 0xeb1, + 0xeb4, 0xeb9, + 0xebb, 0xebc, + 0xec8, 0xecd, + 0xf18, 0xf19, + 0xf35, 0xf35, + 0xf37, 0xf37, + 0xf39, 0xf39, + 0xf3e, 0xf3f, + 0xf71, 0xf84, + 0xf86, 0xf87, + 0xf8d, 0xf97, + 0xf99, 0xfbc, + 0xfc6, 0xfc6, + 0x102b, 0x103e, + 0x1056, 0x1059, + 0x105e, 0x1060, + 0x1062, 0x1064, + 0x1067, 0x106d, + 0x1071, 0x1074, + 0x1082, 0x108d, + 0x108f, 0x108f, + 0x109a, 0x109d, + 0x135d, 0x135f, + 0x1712, 0x1714, + 0x1732, 0x1734, + 0x1752, 0x1753, + 0x1772, 0x1773, + 0x17b4, 0x17d3, + 0x17dd, 0x17dd, + 0x180b, 0x180d, + 0x18a9, 0x18a9, + 0x1920, 0x192b, + 0x1930, 0x193b, + 0x19b0, 0x19c0, + 0x19c8, 0x19c9, + 0x1a17, 0x1a1b, + 0x1a55, 0x1a5e, + 0x1a60, 0x1a7c, + 0x1a7f, 0x1a7f, + 0x1ab0, 0x1abe, + 0x1b00, 0x1b04, + 0x1b34, 0x1b44, + 0x1b6b, 0x1b73, + 0x1b80, 0x1b82, + 0x1ba1, 0x1bad, + 0x1be6, 0x1bf3, + 0x1c24, 0x1c37, + 0x1cd0, 0x1cd2, + 0x1cd4, 0x1ce8, + 0x1ced, 0x1ced, + 0x1cf2, 0x1cf4, + 0x1cf8, 0x1cf9, + 0x1dc0, 0x1df5, + 0x1dfc, 0x1dff, + 0x20d0, 0x20f0, + 0x2cef, 0x2cf1, + 0x2d7f, 0x2d7f, + 0x2de0, 0x2dff, + 0x302a, 0x302f, + 0x3099, 0x309a, + 0xa66f, 0xa672, + 0xa674, 0xa67d, + 0xa69f, 0xa69f, + 0xa6f0, 0xa6f1, + 0xa802, 0xa802, + 0xa806, 0xa806, + 0xa80b, 0xa80b, + 0xa823, 0xa827, + 0xa880, 0xa881, + 0xa8b4, 0xa8c4, + 0xa8e0, 0xa8f1, + 0xa926, 0xa92d, + 0xa947, 0xa953, + 0xa980, 0xa983, + 0xa9b3, 0xa9c0, + 0xa9e5, 0xa9e5, + 0xaa29, 0xaa36, + 0xaa43, 0xaa43, + 0xaa4c, 0xaa4d, + 0xaa7b, 0xaa7d, + 0xaab0, 0xaab0, + 0xaab2, 0xaab4, + 0xaab7, 0xaab8, + 0xaabe, 0xaabf, + 0xaac1, 0xaac1, + 0xaaeb, 0xaaef, + 0xaaf5, 0xaaf6, + 0xabe3, 0xabea, + 0xabec, 0xabed, + 0xfb1e, 0xfb1e, + 0xfe00, 0xfe0f, + 0xfe20, 0xfe2d, + 0x101fd, 0x101fd, + 0x102e0, 0x102e0, + 0x10376, 0x1037a, + 0x10a01, 0x10a03, + 0x10a05, 0x10a06, + 0x10a0c, 0x10a0f, + 0x10a38, 0x10a3a, + 0x10a3f, 0x10a3f, + 0x10ae5, 0x10ae6, + 0x11000, 0x11002, + 0x11038, 0x11046, + 0x1107f, 0x11082, + 0x110b0, 0x110ba, + 0x11100, 0x11102, + 0x11127, 0x11134, + 0x11173, 0x11173, + 0x11180, 0x11182, + 0x111b3, 0x111c0, + 0x1122c, 0x11237, + 0x112df, 0x112ea, + 0x11301, 0x11303, + 0x1133c, 0x1133c, + 0x1133e, 0x11344, + 0x11347, 0x11348, + 0x1134b, 0x1134d, + 0x11357, 0x11357, + 0x11362, 0x11363, + 0x11366, 0x1136c, + 0x11370, 0x11374, + 0x114b0, 0x114c3, + 0x115af, 0x115b5, + 0x115b8, 0x115c0, + 0x11630, 0x11640, + 0x116ab, 0x116b7, + 0x16af0, 0x16af4, + 0x16b30, 0x16b36, + 0x16f51, 0x16f7e, + 0x16f8f, 0x16f92, + 0x1bc9d, 0x1bc9e, + 0x1d165, 0x1d169, + 0x1d16d, 0x1d172, + 0x1d17b, 0x1d182, + 0x1d185, 0x1d18b, + 0x1d1aa, 0x1d1ad, + 0x1d242, 0x1d244, + 0x1e8d0, 0x1e8d6, + 0xe0100, 0xe01ef +}; +UCP_FN(M) + +static const unichar ucp_Mc_def[] = { + 0x903, 0x903, + 0x93b, 0x93b, + 0x93e, 0x940, + 0x949, 0x94c, + 0x94e, 0x94f, + 0x982, 0x983, + 0x9be, 0x9c0, + 0x9c7, 0x9c8, + 0x9cb, 0x9cc, + 0x9d7, 0x9d7, + 0xa03, 0xa03, + 0xa3e, 0xa40, + 0xa83, 0xa83, + 0xabe, 0xac0, + 0xac9, 0xac9, + 0xacb, 0xacc, + 0xb02, 0xb03, + 0xb3e, 0xb3e, + 0xb40, 0xb40, + 0xb47, 0xb48, + 0xb4b, 0xb4c, + 0xb57, 0xb57, + 0xbbe, 0xbbf, + 0xbc1, 0xbc2, + 0xbc6, 0xbc8, + 0xbca, 0xbcc, + 0xbd7, 0xbd7, + 0xc01, 0xc03, + 0xc41, 0xc44, + 0xc82, 0xc83, + 0xcbe, 0xcbe, + 0xcc0, 0xcc4, + 0xcc7, 0xcc8, + 0xcca, 0xccb, + 0xcd5, 0xcd6, + 0xd02, 0xd03, + 0xd3e, 0xd40, + 0xd46, 0xd48, + 0xd4a, 0xd4c, + 0xd57, 0xd57, + 0xd82, 0xd83, + 0xdcf, 0xdd1, + 0xdd8, 0xddf, + 0xdf2, 0xdf3, + 0xf3e, 0xf3f, + 0xf7f, 0xf7f, + 0x102b, 0x102c, + 0x1031, 0x1031, + 0x1038, 0x1038, + 0x103b, 0x103c, + 0x1056, 0x1057, + 0x1062, 0x1064, + 0x1067, 0x106d, + 0x1083, 0x1084, + 0x1087, 0x108c, + 0x108f, 0x108f, + 0x109a, 0x109c, + 0x17b6, 0x17b6, + 0x17be, 0x17c5, + 0x17c7, 0x17c8, + 0x1923, 0x1926, + 0x1929, 0x192b, + 0x1930, 0x1931, + 0x1933, 0x1938, + 0x19b0, 0x19c0, + 0x19c8, 0x19c9, + 0x1a19, 0x1a1a, + 0x1a55, 0x1a55, + 0x1a57, 0x1a57, + 0x1a61, 0x1a61, + 0x1a63, 0x1a64, + 0x1a6d, 0x1a72, + 0x1b04, 0x1b04, + 0x1b35, 0x1b35, + 0x1b3b, 0x1b3b, + 0x1b3d, 0x1b41, + 0x1b43, 0x1b44, + 0x1b82, 0x1b82, + 0x1ba1, 0x1ba1, + 0x1ba6, 0x1ba7, + 0x1baa, 0x1baa, + 0x1be7, 0x1be7, + 0x1bea, 0x1bec, + 0x1bee, 0x1bee, + 0x1bf2, 0x1bf3, + 0x1c24, 0x1c2b, + 0x1c34, 0x1c35, + 0x1ce1, 0x1ce1, + 0x1cf2, 0x1cf3, + 0x302e, 0x302f, + 0xa823, 0xa824, + 0xa827, 0xa827, + 0xa880, 0xa881, + 0xa8b4, 0xa8c3, + 0xa952, 0xa953, + 0xa983, 0xa983, + 0xa9b4, 0xa9b5, + 0xa9ba, 0xa9bb, + 0xa9bd, 0xa9c0, + 0xaa2f, 0xaa30, + 0xaa33, 0xaa34, + 0xaa4d, 0xaa4d, + 0xaa7b, 0xaa7b, + 0xaa7d, 0xaa7d, + 0xaaeb, 0xaaeb, + 0xaaee, 0xaaef, + 0xaaf5, 0xaaf5, + 0xabe3, 0xabe4, + 0xabe6, 0xabe7, + 0xabe9, 0xabea, + 0xabec, 0xabec, + 0x11000, 0x11000, + 0x11002, 0x11002, + 0x11082, 0x11082, + 0x110b0, 0x110b2, + 0x110b7, 0x110b8, + 0x1112c, 0x1112c, + 0x11182, 0x11182, + 0x111b3, 0x111b5, + 0x111bf, 0x111c0, + 0x1122c, 0x1122e, + 0x11232, 0x11233, + 0x11235, 0x11235, + 0x112e0, 0x112e2, + 0x11302, 0x11303, + 0x1133e, 0x1133f, + 0x11341, 0x11344, + 0x11347, 0x11348, + 0x1134b, 0x1134d, + 0x11357, 0x11357, + 0x11362, 0x11363, + 0x114b0, 0x114b2, + 0x114b9, 0x114b9, + 0x114bb, 0x114be, + 0x114c1, 0x114c1, + 0x115af, 0x115b1, + 0x115b8, 0x115bb, + 0x115be, 0x115be, + 0x11630, 0x11632, + 0x1163b, 0x1163c, + 0x1163e, 0x1163e, + 0x116ac, 0x116ac, + 0x116ae, 0x116af, + 0x116b6, 0x116b6, + 0x16f51, 0x16f7e, + 0x1d165, 0x1d166, + 0x1d16d, 0x1d172 +}; +UCP_FN(Mc) + +static const unichar ucp_Me_def[] = { + 0x488, 0x489, + 0x1abe, 0x1abe, + 0x20dd, 0x20e0, + 0x20e2, 0x20e4, + 0xa670, 0xa672 +}; +UCP_FN(Me) + +static const unichar ucp_Mn_def[] = { + 0x300, 0x36f, + 0x483, 0x487, + 0x591, 0x5bd, + 0x5bf, 0x5bf, + 0x5c1, 0x5c2, + 0x5c4, 0x5c5, + 0x5c7, 0x5c7, + 0x610, 0x61a, + 0x64b, 0x65f, + 0x670, 0x670, + 0x6d6, 0x6dc, + 0x6df, 0x6e4, + 0x6e7, 0x6e8, + 0x6ea, 0x6ed, + 0x711, 0x711, + 0x730, 0x74a, + 0x7a6, 0x7b0, + 0x7eb, 0x7f3, + 0x816, 0x819, + 0x81b, 0x823, + 0x825, 0x827, + 0x829, 0x82d, + 0x859, 0x85b, + 0x8e4, 0x902, + 0x93a, 0x93a, + 0x93c, 0x93c, + 0x941, 0x948, + 0x94d, 0x94d, + 0x951, 0x957, + 0x962, 0x963, + 0x981, 0x981, + 0x9bc, 0x9bc, + 0x9c1, 0x9c4, + 0x9cd, 0x9cd, + 0x9e2, 0x9e3, + 0xa01, 0xa02, + 0xa3c, 0xa3c, + 0xa41, 0xa42, + 0xa47, 0xa48, + 0xa4b, 0xa4d, + 0xa51, 0xa51, + 0xa70, 0xa71, + 0xa75, 0xa75, + 0xa81, 0xa82, + 0xabc, 0xabc, + 0xac1, 0xac5, + 0xac7, 0xac8, + 0xacd, 0xacd, + 0xae2, 0xae3, + 0xb01, 0xb01, + 0xb3c, 0xb3c, + 0xb3f, 0xb3f, + 0xb41, 0xb44, + 0xb4d, 0xb4d, + 0xb56, 0xb56, + 0xb62, 0xb63, + 0xb82, 0xb82, + 0xbc0, 0xbc0, + 0xbcd, 0xbcd, + 0xc00, 0xc00, + 0xc3e, 0xc40, + 0xc46, 0xc48, + 0xc4a, 0xc4d, + 0xc55, 0xc56, + 0xc62, 0xc63, + 0xc81, 0xc81, + 0xcbc, 0xcbc, + 0xcbf, 0xcbf, + 0xcc6, 0xcc6, + 0xccc, 0xccd, + 0xce2, 0xce3, + 0xd01, 0xd01, + 0xd41, 0xd44, + 0xd4d, 0xd4d, + 0xd62, 0xd63, + 0xdca, 0xdca, + 0xdd2, 0xdd4, + 0xdd6, 0xdd6, + 0xe31, 0xe31, + 0xe34, 0xe3a, + 0xe47, 0xe4e, + 0xeb1, 0xeb1, + 0xeb4, 0xeb9, + 0xebb, 0xebc, + 0xec8, 0xecd, + 0xf18, 0xf19, + 0xf35, 0xf35, + 0xf37, 0xf37, + 0xf39, 0xf39, + 0xf71, 0xf7e, + 0xf80, 0xf84, + 0xf86, 0xf87, + 0xf8d, 0xf97, + 0xf99, 0xfbc, + 0xfc6, 0xfc6, + 0x102d, 0x1030, + 0x1032, 0x1037, + 0x1039, 0x103a, + 0x103d, 0x103e, + 0x1058, 0x1059, + 0x105e, 0x1060, + 0x1071, 0x1074, + 0x1082, 0x1082, + 0x1085, 0x1086, + 0x108d, 0x108d, + 0x109d, 0x109d, + 0x135d, 0x135f, + 0x1712, 0x1714, + 0x1732, 0x1734, + 0x1752, 0x1753, + 0x1772, 0x1773, + 0x17b4, 0x17b5, + 0x17b7, 0x17bd, + 0x17c6, 0x17c6, + 0x17c9, 0x17d3, + 0x17dd, 0x17dd, + 0x180b, 0x180d, + 0x18a9, 0x18a9, + 0x1920, 0x1922, + 0x1927, 0x1928, + 0x1932, 0x1932, + 0x1939, 0x193b, + 0x1a17, 0x1a18, + 0x1a1b, 0x1a1b, + 0x1a56, 0x1a56, + 0x1a58, 0x1a5e, + 0x1a60, 0x1a60, + 0x1a62, 0x1a62, + 0x1a65, 0x1a6c, + 0x1a73, 0x1a7c, + 0x1a7f, 0x1a7f, + 0x1ab0, 0x1abd, + 0x1b00, 0x1b03, + 0x1b34, 0x1b34, + 0x1b36, 0x1b3a, + 0x1b3c, 0x1b3c, + 0x1b42, 0x1b42, + 0x1b6b, 0x1b73, + 0x1b80, 0x1b81, + 0x1ba2, 0x1ba5, + 0x1ba8, 0x1ba9, + 0x1bab, 0x1bad, + 0x1be6, 0x1be6, + 0x1be8, 0x1be9, + 0x1bed, 0x1bed, + 0x1bef, 0x1bf1, + 0x1c2c, 0x1c33, + 0x1c36, 0x1c37, + 0x1cd0, 0x1cd2, + 0x1cd4, 0x1ce0, + 0x1ce2, 0x1ce8, + 0x1ced, 0x1ced, + 0x1cf4, 0x1cf4, + 0x1cf8, 0x1cf9, + 0x1dc0, 0x1df5, + 0x1dfc, 0x1dff, + 0x20d0, 0x20dc, + 0x20e1, 0x20e1, + 0x20e5, 0x20f0, + 0x2cef, 0x2cf1, + 0x2d7f, 0x2d7f, + 0x2de0, 0x2dff, + 0x302a, 0x302d, + 0x3099, 0x309a, + 0xa66f, 0xa66f, + 0xa674, 0xa67d, + 0xa69f, 0xa69f, + 0xa6f0, 0xa6f1, + 0xa802, 0xa802, + 0xa806, 0xa806, + 0xa80b, 0xa80b, + 0xa825, 0xa826, + 0xa8c4, 0xa8c4, + 0xa8e0, 0xa8f1, + 0xa926, 0xa92d, + 0xa947, 0xa951, + 0xa980, 0xa982, + 0xa9b3, 0xa9b3, + 0xa9b6, 0xa9b9, + 0xa9bc, 0xa9bc, + 0xa9e5, 0xa9e5, + 0xaa29, 0xaa2e, + 0xaa31, 0xaa32, + 0xaa35, 0xaa36, + 0xaa43, 0xaa43, + 0xaa4c, 0xaa4c, + 0xaa7c, 0xaa7c, + 0xaab0, 0xaab0, + 0xaab2, 0xaab4, + 0xaab7, 0xaab8, + 0xaabe, 0xaabf, + 0xaac1, 0xaac1, + 0xaaec, 0xaaed, + 0xaaf6, 0xaaf6, + 0xabe5, 0xabe5, + 0xabe8, 0xabe8, + 0xabed, 0xabed, + 0xfb1e, 0xfb1e, + 0xfe00, 0xfe0f, + 0xfe20, 0xfe2d, + 0x101fd, 0x101fd, + 0x102e0, 0x102e0, + 0x10376, 0x1037a, + 0x10a01, 0x10a03, + 0x10a05, 0x10a06, + 0x10a0c, 0x10a0f, + 0x10a38, 0x10a3a, + 0x10a3f, 0x10a3f, + 0x10ae5, 0x10ae6, + 0x11001, 0x11001, + 0x11038, 0x11046, + 0x1107f, 0x11081, + 0x110b3, 0x110b6, + 0x110b9, 0x110ba, + 0x11100, 0x11102, + 0x11127, 0x1112b, + 0x1112d, 0x11134, + 0x11173, 0x11173, + 0x11180, 0x11181, + 0x111b6, 0x111be, + 0x1122f, 0x11231, + 0x11234, 0x11234, + 0x11236, 0x11237, + 0x112df, 0x112df, + 0x112e3, 0x112ea, + 0x11301, 0x11301, + 0x1133c, 0x1133c, + 0x11340, 0x11340, + 0x11366, 0x1136c, + 0x11370, 0x11374, + 0x114b3, 0x114b8, + 0x114ba, 0x114ba, + 0x114bf, 0x114c0, + 0x114c2, 0x114c3, + 0x115b2, 0x115b5, + 0x115bc, 0x115bd, + 0x115bf, 0x115c0, + 0x11633, 0x1163a, + 0x1163d, 0x1163d, + 0x1163f, 0x11640, + 0x116ab, 0x116ab, + 0x116ad, 0x116ad, + 0x116b0, 0x116b5, + 0x116b7, 0x116b7, + 0x16af0, 0x16af4, + 0x16b30, 0x16b36, + 0x16f8f, 0x16f92, + 0x1bc9d, 0x1bc9e, + 0x1d167, 0x1d169, + 0x1d17b, 0x1d182, + 0x1d185, 0x1d18b, + 0x1d1aa, 0x1d1ad, + 0x1d242, 0x1d244, + 0x1e8d0, 0x1e8d6, + 0xe0100, 0xe01ef +}; +UCP_FN(Mn) + +static const unichar ucp_N_def[] = { + 0x30, 0x39, + 0xb2, 0xb3, + 0xb9, 0xb9, + 0xbc, 0xbe, + 0x660, 0x669, + 0x6f0, 0x6f9, + 0x7c0, 0x7c9, + 0x966, 0x96f, + 0x9e6, 0x9ef, + 0x9f4, 0x9f9, + 0xa66, 0xa6f, + 0xae6, 0xaef, + 0xb66, 0xb6f, + 0xb72, 0xb77, + 0xbe6, 0xbf2, + 0xc66, 0xc6f, + 0xc78, 0xc7e, + 0xce6, 0xcef, + 0xd66, 0xd75, + 0xde6, 0xdef, + 0xe50, 0xe59, + 0xed0, 0xed9, + 0xf20, 0xf33, + 0x1040, 0x1049, + 0x1090, 0x1099, + 0x1369, 0x137c, + 0x16ee, 0x16f0, + 0x17e0, 0x17e9, + 0x17f0, 0x17f9, + 0x1810, 0x1819, + 0x1946, 0x194f, + 0x19d0, 0x19da, + 0x1a80, 0x1a89, + 0x1a90, 0x1a99, + 0x1b50, 0x1b59, + 0x1bb0, 0x1bb9, + 0x1c40, 0x1c49, + 0x1c50, 0x1c59, + 0x2070, 0x2070, + 0x2074, 0x2079, + 0x2080, 0x2089, + 0x2150, 0x2182, + 0x2185, 0x2189, + 0x2460, 0x249b, + 0x24ea, 0x24ff, + 0x2776, 0x2793, + 0x2cfd, 0x2cfd, + 0x3007, 0x3007, + 0x3021, 0x3029, + 0x3038, 0x303a, + 0x3192, 0x3195, + 0x3220, 0x3229, + 0x3248, 0x324f, + 0x3251, 0x325f, + 0x3280, 0x3289, + 0x32b1, 0x32bf, + 0xa620, 0xa629, + 0xa6e6, 0xa6ef, + 0xa830, 0xa835, + 0xa8d0, 0xa8d9, + 0xa900, 0xa909, + 0xa9d0, 0xa9d9, + 0xa9f0, 0xa9f9, + 0xaa50, 0xaa59, + 0xabf0, 0xabf9, + 0xff10, 0xff19, + 0x10107, 0x10133, + 0x10140, 0x10178, + 0x1018a, 0x1018b, + 0x102e1, 0x102fb, + 0x10320, 0x10323, + 0x10341, 0x10341, + 0x1034a, 0x1034a, + 0x103d1, 0x103d5, + 0x104a0, 0x104a9, + 0x10858, 0x1085f, + 0x10879, 0x1087f, + 0x108a7, 0x108af, + 0x10916, 0x1091b, + 0x10a40, 0x10a47, + 0x10a7d, 0x10a7e, + 0x10a9d, 0x10a9f, + 0x10aeb, 0x10aef, + 0x10b58, 0x10b5f, + 0x10b78, 0x10b7f, + 0x10ba9, 0x10baf, + 0x10e60, 0x10e7e, + 0x11052, 0x1106f, + 0x110f0, 0x110f9, + 0x11136, 0x1113f, + 0x111d0, 0x111d9, + 0x111e1, 0x111f4, + 0x112f0, 0x112f9, + 0x114d0, 0x114d9, + 0x11650, 0x11659, + 0x116c0, 0x116c9, + 0x118e0, 0x118f2, + 0x12400, 0x1246e, + 0x16a60, 0x16a69, + 0x16b50, 0x16b59, + 0x16b5b, 0x16b61, + 0x1d360, 0x1d371, + 0x1d7ce, 0x1d7ff, + 0x1e8c7, 0x1e8cf, + 0x1f100, 0x1f10c +}; +UCP_FN(N) + +static const unichar ucp_Nd_def[] = { + 0x30, 0x39, + 0x660, 0x669, + 0x6f0, 0x6f9, + 0x7c0, 0x7c9, + 0x966, 0x96f, + 0x9e6, 0x9ef, + 0xa66, 0xa6f, + 0xae6, 0xaef, + 0xb66, 0xb6f, + 0xbe6, 0xbef, + 0xc66, 0xc6f, + 0xce6, 0xcef, + 0xd66, 0xd6f, + 0xde6, 0xdef, + 0xe50, 0xe59, + 0xed0, 0xed9, + 0xf20, 0xf29, + 0x1040, 0x1049, + 0x1090, 0x1099, + 0x17e0, 0x17e9, + 0x1810, 0x1819, + 0x1946, 0x194f, + 0x19d0, 0x19d9, + 0x1a80, 0x1a89, + 0x1a90, 0x1a99, + 0x1b50, 0x1b59, + 0x1bb0, 0x1bb9, + 0x1c40, 0x1c49, + 0x1c50, 0x1c59, + 0xa620, 0xa629, + 0xa8d0, 0xa8d9, + 0xa900, 0xa909, + 0xa9d0, 0xa9d9, + 0xa9f0, 0xa9f9, + 0xaa50, 0xaa59, + 0xabf0, 0xabf9, + 0xff10, 0xff19, + 0x104a0, 0x104a9, + 0x11066, 0x1106f, + 0x110f0, 0x110f9, + 0x11136, 0x1113f, + 0x111d0, 0x111d9, + 0x112f0, 0x112f9, + 0x114d0, 0x114d9, + 0x11650, 0x11659, + 0x116c0, 0x116c9, + 0x118e0, 0x118e9, + 0x16a60, 0x16a69, + 0x16b50, 0x16b59, + 0x1d7ce, 0x1d7ff +}; +UCP_FN(Nd) + +static const unichar ucp_Nl_def[] = { + 0x16ee, 0x16f0, + 0x2160, 0x2182, + 0x2185, 0x2188, + 0x3007, 0x3007, + 0x3021, 0x3029, + 0x3038, 0x303a, + 0xa6e6, 0xa6ef, + 0x10140, 0x10174, + 0x10341, 0x10341, + 0x1034a, 0x1034a, + 0x103d1, 0x103d5, + 0x12400, 0x1246e +}; +UCP_FN(Nl) + +static const unichar ucp_No_def[] = { + 0xb2, 0xb3, + 0xb9, 0xb9, + 0xbc, 0xbe, + 0x9f4, 0x9f9, + 0xb72, 0xb77, + 0xbf0, 0xbf2, + 0xc78, 0xc7e, + 0xd70, 0xd75, + 0xf2a, 0xf33, + 0x1369, 0x137c, + 0x17f0, 0x17f9, + 0x19da, 0x19da, + 0x2070, 0x2070, + 0x2074, 0x2079, + 0x2080, 0x2089, + 0x2150, 0x215f, + 0x2189, 0x2189, + 0x2460, 0x249b, + 0x24ea, 0x24ff, + 0x2776, 0x2793, + 0x2cfd, 0x2cfd, + 0x3192, 0x3195, + 0x3220, 0x3229, + 0x3248, 0x324f, + 0x3251, 0x325f, + 0x3280, 0x3289, + 0x32b1, 0x32bf, + 0xa830, 0xa835, + 0x10107, 0x10133, + 0x10175, 0x10178, + 0x1018a, 0x1018b, + 0x102e1, 0x102fb, + 0x10320, 0x10323, + 0x10858, 0x1085f, + 0x10879, 0x1087f, + 0x108a7, 0x108af, + 0x10916, 0x1091b, + 0x10a40, 0x10a47, + 0x10a7d, 0x10a7e, + 0x10a9d, 0x10a9f, + 0x10aeb, 0x10aef, + 0x10b58, 0x10b5f, + 0x10b78, 0x10b7f, + 0x10ba9, 0x10baf, + 0x10e60, 0x10e7e, + 0x11052, 0x11065, + 0x111e1, 0x111f4, + 0x118ea, 0x118f2, + 0x16b5b, 0x16b61, + 0x1d360, 0x1d371, + 0x1e8c7, 0x1e8cf, + 0x1f100, 0x1f10c +}; +UCP_FN(No) + +static const unichar ucp_P_def[] = { + 0x21, 0x23, + 0x25, 0x2a, + 0x2c, 0x2f, + 0x3a, 0x3b, + 0x3f, 0x40, + 0x5b, 0x5d, + 0x5f, 0x5f, + 0x7b, 0x7b, + 0x7d, 0x7d, + 0xa1, 0xa1, + 0xa7, 0xa7, + 0xab, 0xab, + 0xb6, 0xb7, + 0xbb, 0xbb, + 0xbf, 0xbf, + 0x37e, 0x37e, + 0x387, 0x387, + 0x55a, 0x55f, + 0x589, 0x58a, + 0x5be, 0x5be, + 0x5c0, 0x5c0, + 0x5c3, 0x5c3, + 0x5c6, 0x5c6, + 0x5f3, 0x5f4, + 0x609, 0x60a, + 0x60c, 0x60d, + 0x61b, 0x61b, + 0x61e, 0x61f, + 0x66a, 0x66d, + 0x6d4, 0x6d4, + 0x700, 0x70d, + 0x7f7, 0x7f9, + 0x830, 0x83e, + 0x85e, 0x85e, + 0x964, 0x965, + 0x970, 0x970, + 0xaf0, 0xaf0, + 0xdf4, 0xdf4, + 0xe4f, 0xe4f, + 0xe5a, 0xe5b, + 0xf04, 0xf12, + 0xf14, 0xf14, + 0xf3a, 0xf3d, + 0xf85, 0xf85, + 0xfd0, 0xfd4, + 0xfd9, 0xfda, + 0x104a, 0x104f, + 0x10fb, 0x10fb, + 0x1360, 0x1368, + 0x1400, 0x1400, + 0x166d, 0x166e, + 0x169b, 0x169c, + 0x16eb, 0x16ed, + 0x1735, 0x1736, + 0x17d4, 0x17d6, + 0x17d8, 0x17da, + 0x1800, 0x180a, + 0x1944, 0x1945, + 0x1a1e, 0x1a1f, + 0x1aa0, 0x1aa6, + 0x1aa8, 0x1aad, + 0x1b5a, 0x1b60, + 0x1bfc, 0x1bff, + 0x1c3b, 0x1c3f, + 0x1c7e, 0x1c7f, + 0x1cc0, 0x1cc7, + 0x1cd3, 0x1cd3, + 0x2010, 0x2027, + 0x2030, 0x2043, + 0x2045, 0x2051, + 0x2053, 0x205e, + 0x207d, 0x207e, + 0x208d, 0x208e, + 0x2308, 0x230b, + 0x2329, 0x232a, + 0x2768, 0x2775, + 0x27c5, 0x27c6, + 0x27e6, 0x27ef, + 0x2983, 0x2998, + 0x29d8, 0x29db, + 0x29fc, 0x29fd, + 0x2cf9, 0x2cfc, + 0x2cfe, 0x2cff, + 0x2d70, 0x2d70, + 0x2e00, 0x2e2e, + 0x2e30, 0x2e42, + 0x3001, 0x3003, + 0x3008, 0x3011, + 0x3014, 0x301f, + 0x3030, 0x3030, + 0x303d, 0x303d, + 0x30a0, 0x30a0, + 0x30fb, 0x30fb, + 0xa4fe, 0xa4ff, + 0xa60d, 0xa60f, + 0xa673, 0xa673, + 0xa67e, 0xa67e, + 0xa6f2, 0xa6f7, + 0xa874, 0xa877, + 0xa8ce, 0xa8cf, + 0xa8f8, 0xa8fa, + 0xa92e, 0xa92f, + 0xa95f, 0xa95f, + 0xa9c1, 0xa9cd, + 0xa9de, 0xa9df, + 0xaa5c, 0xaa5f, + 0xaade, 0xaadf, + 0xaaf0, 0xaaf1, + 0xabeb, 0xabeb, + 0xfd3e, 0xfd3f, + 0xfe10, 0xfe19, + 0xfe30, 0xfe52, + 0xfe54, 0xfe61, + 0xfe63, 0xfe63, + 0xfe68, 0xfe68, + 0xfe6a, 0xfe6b, + 0xff01, 0xff03, + 0xff05, 0xff0a, + 0xff0c, 0xff0f, + 0xff1a, 0xff1b, + 0xff1f, 0xff20, + 0xff3b, 0xff3d, + 0xff3f, 0xff3f, + 0xff5b, 0xff5b, + 0xff5d, 0xff5d, + 0xff5f, 0xff65, + 0x10100, 0x10102, + 0x1039f, 0x1039f, + 0x103d0, 0x103d0, + 0x1056f, 0x1056f, + 0x10857, 0x10857, + 0x1091f, 0x1091f, + 0x1093f, 0x1093f, + 0x10a50, 0x10a58, + 0x10a7f, 0x10a7f, + 0x10af0, 0x10af6, + 0x10b39, 0x10b3f, + 0x10b99, 0x10b9c, + 0x11047, 0x1104d, + 0x110bb, 0x110bc, + 0x110be, 0x110c1, + 0x11140, 0x11143, + 0x11174, 0x11175, + 0x111c5, 0x111c8, + 0x111cd, 0x111cd, + 0x11238, 0x1123d, + 0x114c6, 0x114c6, + 0x115c1, 0x115c9, + 0x11641, 0x11643, + 0x12470, 0x12474, + 0x16a6e, 0x16a6f, + 0x16af5, 0x16af5, + 0x16b37, 0x16b3b, + 0x16b44, 0x16b44, + 0x1bc9f, 0x1bc9f +}; +UCP_FN(P) + +static const unichar ucp_Pc_def[] = { + 0x5f, 0x5f, + 0x203f, 0x2040, + 0x2054, 0x2054, + 0xfe33, 0xfe34, + 0xfe4d, 0xfe4f, + 0xff3f, 0xff3f +}; +UCP_FN(Pc) + +static const unichar ucp_Pd_def[] = { + 0x2d, 0x2d, + 0x58a, 0x58a, + 0x5be, 0x5be, + 0x1400, 0x1400, + 0x1806, 0x1806, + 0x2010, 0x2015, + 0x2e17, 0x2e17, + 0x2e1a, 0x2e1a, + 0x2e3a, 0x2e3b, + 0x2e40, 0x2e40, + 0x301c, 0x301c, + 0x3030, 0x3030, + 0x30a0, 0x30a0, + 0xfe31, 0xfe32, + 0xfe58, 0xfe58, + 0xfe63, 0xfe63, + 0xff0d, 0xff0d +}; +UCP_FN(Pd) + +static const unichar ucp_Pe_def[] = { + 0x29, 0x29, + 0x5d, 0x5d, + 0x7d, 0x7d, + 0xf3b, 0xf3b, + 0xf3d, 0xf3d, + 0x169c, 0x169c, + 0x2046, 0x2046, + 0x207e, 0x207e, + 0x208e, 0x208e, + 0x2309, 0x2309, + 0x230b, 0x230b, + 0x232a, 0x232a, + 0x2769, 0x2769, + 0x276b, 0x276b, + 0x276d, 0x276d, + 0x276f, 0x276f, + 0x2771, 0x2771, + 0x2773, 0x2773, + 0x2775, 0x2775, + 0x27c6, 0x27c6, + 0x27e7, 0x27e7, + 0x27e9, 0x27e9, + 0x27eb, 0x27eb, + 0x27ed, 0x27ed, + 0x27ef, 0x27ef, + 0x2984, 0x2984, + 0x2986, 0x2986, + 0x2988, 0x2988, + 0x298a, 0x298a, + 0x298c, 0x298c, + 0x298e, 0x298e, + 0x2990, 0x2990, + 0x2992, 0x2992, + 0x2994, 0x2994, + 0x2996, 0x2996, + 0x2998, 0x2998, + 0x29d9, 0x29d9, + 0x29db, 0x29db, + 0x29fd, 0x29fd, + 0x2e23, 0x2e23, + 0x2e25, 0x2e25, + 0x2e27, 0x2e27, + 0x2e29, 0x2e29, + 0x3009, 0x3009, + 0x300b, 0x300b, + 0x300d, 0x300d, + 0x300f, 0x300f, + 0x3011, 0x3011, + 0x3015, 0x3015, + 0x3017, 0x3017, + 0x3019, 0x3019, + 0x301b, 0x301b, + 0x301e, 0x301f, + 0xfd3e, 0xfd3e, + 0xfe18, 0xfe18, + 0xfe36, 0xfe36, + 0xfe38, 0xfe38, + 0xfe3a, 0xfe3a, + 0xfe3c, 0xfe3c, + 0xfe3e, 0xfe3e, + 0xfe40, 0xfe40, + 0xfe42, 0xfe42, + 0xfe44, 0xfe44, + 0xfe48, 0xfe48, + 0xfe5a, 0xfe5a, + 0xfe5c, 0xfe5c, + 0xfe5e, 0xfe5e, + 0xff09, 0xff09, + 0xff3d, 0xff3d, + 0xff5d, 0xff5d, + 0xff60, 0xff60, + 0xff63, 0xff63 +}; +UCP_FN(Pe) + +static const unichar ucp_Pf_def[] = { + 0xbb, 0xbb, + 0x2019, 0x2019, + 0x201d, 0x201d, + 0x203a, 0x203a, + 0x2e03, 0x2e03, + 0x2e05, 0x2e05, + 0x2e0a, 0x2e0a, + 0x2e0d, 0x2e0d, + 0x2e1d, 0x2e1d, + 0x2e21, 0x2e21 +}; +UCP_FN(Pf) + +static const unichar ucp_Pi_def[] = { + 0xab, 0xab, + 0x2018, 0x2018, + 0x201b, 0x201c, + 0x201f, 0x201f, + 0x2039, 0x2039, + 0x2e02, 0x2e02, + 0x2e04, 0x2e04, + 0x2e09, 0x2e09, + 0x2e0c, 0x2e0c, + 0x2e1c, 0x2e1c, + 0x2e20, 0x2e20 +}; +UCP_FN(Pi) + +static const unichar ucp_Po_def[] = { + 0x21, 0x23, + 0x25, 0x27, + 0x2a, 0x2a, + 0x2c, 0x2c, + 0x2e, 0x2f, + 0x3a, 0x3b, + 0x3f, 0x40, + 0x5c, 0x5c, + 0xa1, 0xa1, + 0xa7, 0xa7, + 0xb6, 0xb7, + 0xbf, 0xbf, + 0x37e, 0x37e, + 0x387, 0x387, + 0x55a, 0x55f, + 0x589, 0x589, + 0x5c0, 0x5c0, + 0x5c3, 0x5c3, + 0x5c6, 0x5c6, + 0x5f3, 0x5f4, + 0x609, 0x60a, + 0x60c, 0x60d, + 0x61b, 0x61b, + 0x61e, 0x61f, + 0x66a, 0x66d, + 0x6d4, 0x6d4, + 0x700, 0x70d, + 0x7f7, 0x7f9, + 0x830, 0x83e, + 0x85e, 0x85e, + 0x964, 0x965, + 0x970, 0x970, + 0xaf0, 0xaf0, + 0xdf4, 0xdf4, + 0xe4f, 0xe4f, + 0xe5a, 0xe5b, + 0xf04, 0xf12, + 0xf14, 0xf14, + 0xf85, 0xf85, + 0xfd0, 0xfd4, + 0xfd9, 0xfda, + 0x104a, 0x104f, + 0x10fb, 0x10fb, + 0x1360, 0x1368, + 0x166d, 0x166e, + 0x16eb, 0x16ed, + 0x1735, 0x1736, + 0x17d4, 0x17d6, + 0x17d8, 0x17da, + 0x1800, 0x1805, + 0x1807, 0x180a, + 0x1944, 0x1945, + 0x1a1e, 0x1a1f, + 0x1aa0, 0x1aa6, + 0x1aa8, 0x1aad, + 0x1b5a, 0x1b60, + 0x1bfc, 0x1bff, + 0x1c3b, 0x1c3f, + 0x1c7e, 0x1c7f, + 0x1cc0, 0x1cc7, + 0x1cd3, 0x1cd3, + 0x2016, 0x2017, + 0x2020, 0x2027, + 0x2030, 0x2038, + 0x203b, 0x203e, + 0x2041, 0x2043, + 0x2047, 0x2051, + 0x2053, 0x2053, + 0x2055, 0x205e, + 0x2cf9, 0x2cfc, + 0x2cfe, 0x2cff, + 0x2d70, 0x2d70, + 0x2e00, 0x2e01, + 0x2e06, 0x2e08, + 0x2e0b, 0x2e0b, + 0x2e0e, 0x2e16, + 0x2e18, 0x2e19, + 0x2e1b, 0x2e1b, + 0x2e1e, 0x2e1f, + 0x2e2a, 0x2e2e, + 0x2e30, 0x2e39, + 0x2e3c, 0x2e3f, + 0x2e41, 0x2e41, + 0x3001, 0x3003, + 0x303d, 0x303d, + 0x30fb, 0x30fb, + 0xa4fe, 0xa4ff, + 0xa60d, 0xa60f, + 0xa673, 0xa673, + 0xa67e, 0xa67e, + 0xa6f2, 0xa6f7, + 0xa874, 0xa877, + 0xa8ce, 0xa8cf, + 0xa8f8, 0xa8fa, + 0xa92e, 0xa92f, + 0xa95f, 0xa95f, + 0xa9c1, 0xa9cd, + 0xa9de, 0xa9df, + 0xaa5c, 0xaa5f, + 0xaade, 0xaadf, + 0xaaf0, 0xaaf1, + 0xabeb, 0xabeb, + 0xfe10, 0xfe16, + 0xfe19, 0xfe19, + 0xfe30, 0xfe30, + 0xfe45, 0xfe46, + 0xfe49, 0xfe4c, + 0xfe50, 0xfe52, + 0xfe54, 0xfe57, + 0xfe5f, 0xfe61, + 0xfe68, 0xfe68, + 0xfe6a, 0xfe6b, + 0xff01, 0xff03, + 0xff05, 0xff07, + 0xff0a, 0xff0a, + 0xff0c, 0xff0c, + 0xff0e, 0xff0f, + 0xff1a, 0xff1b, + 0xff1f, 0xff20, + 0xff3c, 0xff3c, + 0xff61, 0xff61, + 0xff64, 0xff65, + 0x10100, 0x10102, + 0x1039f, 0x1039f, + 0x103d0, 0x103d0, + 0x1056f, 0x1056f, + 0x10857, 0x10857, + 0x1091f, 0x1091f, + 0x1093f, 0x1093f, + 0x10a50, 0x10a58, + 0x10a7f, 0x10a7f, + 0x10af0, 0x10af6, + 0x10b39, 0x10b3f, + 0x10b99, 0x10b9c, + 0x11047, 0x1104d, + 0x110bb, 0x110bc, + 0x110be, 0x110c1, + 0x11140, 0x11143, + 0x11174, 0x11175, + 0x111c5, 0x111c8, + 0x111cd, 0x111cd, + 0x11238, 0x1123d, + 0x114c6, 0x114c6, + 0x115c1, 0x115c9, + 0x11641, 0x11643, + 0x12470, 0x12474, + 0x16a6e, 0x16a6f, + 0x16af5, 0x16af5, + 0x16b37, 0x16b3b, + 0x16b44, 0x16b44, + 0x1bc9f, 0x1bc9f +}; +UCP_FN(Po) + +static const unichar ucp_Ps_def[] = { + 0x28, 0x28, + 0x5b, 0x5b, + 0x7b, 0x7b, + 0xf3a, 0xf3a, + 0xf3c, 0xf3c, + 0x169b, 0x169b, + 0x201a, 0x201a, + 0x201e, 0x201e, + 0x2045, 0x2045, + 0x207d, 0x207d, + 0x208d, 0x208d, + 0x2308, 0x2308, + 0x230a, 0x230a, + 0x2329, 0x2329, + 0x2768, 0x2768, + 0x276a, 0x276a, + 0x276c, 0x276c, + 0x276e, 0x276e, + 0x2770, 0x2770, + 0x2772, 0x2772, + 0x2774, 0x2774, + 0x27c5, 0x27c5, + 0x27e6, 0x27e6, + 0x27e8, 0x27e8, + 0x27ea, 0x27ea, + 0x27ec, 0x27ec, + 0x27ee, 0x27ee, + 0x2983, 0x2983, + 0x2985, 0x2985, + 0x2987, 0x2987, + 0x2989, 0x2989, + 0x298b, 0x298b, + 0x298d, 0x298d, + 0x298f, 0x298f, + 0x2991, 0x2991, + 0x2993, 0x2993, + 0x2995, 0x2995, + 0x2997, 0x2997, + 0x29d8, 0x29d8, + 0x29da, 0x29da, + 0x29fc, 0x29fc, + 0x2e22, 0x2e22, + 0x2e24, 0x2e24, + 0x2e26, 0x2e26, + 0x2e28, 0x2e28, + 0x2e42, 0x2e42, + 0x3008, 0x3008, + 0x300a, 0x300a, + 0x300c, 0x300c, + 0x300e, 0x300e, + 0x3010, 0x3010, + 0x3014, 0x3014, + 0x3016, 0x3016, + 0x3018, 0x3018, + 0x301a, 0x301a, + 0x301d, 0x301d, + 0xfd3f, 0xfd3f, + 0xfe17, 0xfe17, + 0xfe35, 0xfe35, + 0xfe37, 0xfe37, + 0xfe39, 0xfe39, + 0xfe3b, 0xfe3b, + 0xfe3d, 0xfe3d, + 0xfe3f, 0xfe3f, + 0xfe41, 0xfe41, + 0xfe43, 0xfe43, + 0xfe47, 0xfe47, + 0xfe59, 0xfe59, + 0xfe5b, 0xfe5b, + 0xfe5d, 0xfe5d, + 0xff08, 0xff08, + 0xff3b, 0xff3b, + 0xff5b, 0xff5b, + 0xff5f, 0xff5f, + 0xff62, 0xff62 +}; +UCP_FN(Ps) + +static const unichar ucp_S_def[] = { + 0x24, 0x24, + 0x2b, 0x2b, + 0x3c, 0x3e, + 0x5e, 0x5e, + 0x60, 0x60, + 0x7c, 0x7c, + 0x7e, 0x7e, + 0xa2, 0xa6, + 0xa8, 0xa9, + 0xac, 0xac, + 0xae, 0xb1, + 0xb4, 0xb4, + 0xb8, 0xb8, + 0xd7, 0xd7, + 0xf7, 0xf7, + 0x2c2, 0x2c5, + 0x2d2, 0x2df, + 0x2e5, 0x2eb, + 0x2ed, 0x2ed, + 0x2ef, 0x2ff, + 0x375, 0x375, + 0x384, 0x385, + 0x3f6, 0x3f6, + 0x482, 0x482, + 0x58d, 0x58f, + 0x606, 0x608, + 0x60b, 0x60b, + 0x60e, 0x60f, + 0x6de, 0x6de, + 0x6e9, 0x6e9, + 0x6fd, 0x6fe, + 0x7f6, 0x7f6, + 0x9f2, 0x9f3, + 0x9fa, 0x9fb, + 0xaf1, 0xaf1, + 0xb70, 0xb70, + 0xbf3, 0xbfa, + 0xc7f, 0xc7f, + 0xd79, 0xd79, + 0xe3f, 0xe3f, + 0xf01, 0xf03, + 0xf13, 0xf13, + 0xf15, 0xf17, + 0xf1a, 0xf1f, + 0xf34, 0xf34, + 0xf36, 0xf36, + 0xf38, 0xf38, + 0xfbe, 0xfc5, + 0xfc7, 0xfcc, + 0xfce, 0xfcf, + 0xfd5, 0xfd8, + 0x109e, 0x109f, + 0x1390, 0x1399, + 0x17db, 0x17db, + 0x1940, 0x1940, + 0x19de, 0x19ff, + 0x1b61, 0x1b6a, + 0x1b74, 0x1b7c, + 0x1fbd, 0x1fbd, + 0x1fbf, 0x1fc1, + 0x1fcd, 0x1fcf, + 0x1fdd, 0x1fdf, + 0x1fed, 0x1fef, + 0x1ffd, 0x1ffe, + 0x2044, 0x2044, + 0x2052, 0x2052, + 0x207a, 0x207c, + 0x208a, 0x208c, + 0x20a0, 0x20bd, + 0x2100, 0x2101, + 0x2103, 0x2106, + 0x2108, 0x2109, + 0x2114, 0x2114, + 0x2116, 0x2118, + 0x211e, 0x2123, + 0x2125, 0x2125, + 0x2127, 0x2127, + 0x2129, 0x2129, + 0x212e, 0x212e, + 0x213a, 0x213b, + 0x2140, 0x2144, + 0x214a, 0x214d, + 0x214f, 0x214f, + 0x2190, 0x2307, + 0x230c, 0x2328, + 0x232b, 0x23fa, + 0x2400, 0x2426, + 0x2440, 0x244a, + 0x249c, 0x24e9, + 0x2500, 0x2767, + 0x2794, 0x27c4, + 0x27c7, 0x27e5, + 0x27f0, 0x2982, + 0x2999, 0x29d7, + 0x29dc, 0x29fb, + 0x29fe, 0x2b73, + 0x2b76, 0x2b95, + 0x2b98, 0x2bb9, + 0x2bbd, 0x2bc8, + 0x2bca, 0x2bd1, + 0x2ce5, 0x2cea, + 0x2e80, 0x2e99, + 0x2e9b, 0x2ef3, + 0x2f00, 0x2fd5, + 0x2ff0, 0x2ffb, + 0x3004, 0x3004, + 0x3012, 0x3013, + 0x3020, 0x3020, + 0x3036, 0x3037, + 0x303e, 0x303f, + 0x309b, 0x309c, + 0x3190, 0x3191, + 0x3196, 0x319f, + 0x31c0, 0x31e3, + 0x3200, 0x321e, + 0x322a, 0x3247, + 0x3250, 0x3250, + 0x3260, 0x327f, + 0x328a, 0x32b0, + 0x32c0, 0x32fe, + 0x3300, 0x33ff, + 0x4dc0, 0x4dff, + 0xa490, 0xa4c6, + 0xa700, 0xa716, + 0xa720, 0xa721, + 0xa789, 0xa78a, + 0xa828, 0xa82b, + 0xa836, 0xa839, + 0xaa77, 0xaa79, + 0xab5b, 0xab5b, + 0xfb29, 0xfb29, + 0xfbb2, 0xfbc1, + 0xfdfc, 0xfdfd, + 0xfe62, 0xfe62, + 0xfe64, 0xfe66, + 0xfe69, 0xfe69, + 0xff04, 0xff04, + 0xff0b, 0xff0b, + 0xff1c, 0xff1e, + 0xff3e, 0xff3e, + 0xff40, 0xff40, + 0xff5c, 0xff5c, + 0xff5e, 0xff5e, + 0xffe0, 0xffe6, + 0xffe8, 0xffee, + 0xfffc, 0xfffd, + 0x10137, 0x1013f, + 0x10179, 0x10189, + 0x1018c, 0x1018c, + 0x10190, 0x1019b, + 0x101a0, 0x101a0, + 0x101d0, 0x101fc, + 0x10877, 0x10878, + 0x10ac8, 0x10ac8, + 0x16b3c, 0x16b3f, + 0x16b45, 0x16b45, + 0x1bc9c, 0x1bc9c, + 0x1d000, 0x1d0f5, + 0x1d100, 0x1d126, + 0x1d129, 0x1d164, + 0x1d16a, 0x1d16c, + 0x1d183, 0x1d184, + 0x1d18c, 0x1d1a9, + 0x1d1ae, 0x1d1dd, + 0x1d200, 0x1d241, + 0x1d245, 0x1d245, + 0x1d300, 0x1d356, + 0x1d6c1, 0x1d6c1, + 0x1d6db, 0x1d6db, + 0x1d6fb, 0x1d6fb, + 0x1d715, 0x1d715, + 0x1d735, 0x1d735, + 0x1d74f, 0x1d74f, + 0x1d76f, 0x1d76f, + 0x1d789, 0x1d789, + 0x1d7a9, 0x1d7a9, + 0x1d7c3, 0x1d7c3, + 0x1eef0, 0x1eef1, + 0x1f000, 0x1f02b, + 0x1f030, 0x1f093, + 0x1f0a0, 0x1f0ae, + 0x1f0b1, 0x1f0bf, + 0x1f0c1, 0x1f0cf, + 0x1f0d1, 0x1f0f5, + 0x1f110, 0x1f12e, + 0x1f130, 0x1f16b, + 0x1f170, 0x1f19a, + 0x1f1e6, 0x1f202, + 0x1f210, 0x1f23a, + 0x1f240, 0x1f248, + 0x1f250, 0x1f251, + 0x1f300, 0x1f32c, + 0x1f330, 0x1f37d, + 0x1f380, 0x1f3ce, + 0x1f3d4, 0x1f3f7, + 0x1f400, 0x1f4fe, + 0x1f500, 0x1f54a, + 0x1f550, 0x1f579, + 0x1f57b, 0x1f5a3, + 0x1f5a5, 0x1f642, + 0x1f645, 0x1f6cf, + 0x1f6e0, 0x1f6ec, + 0x1f6f0, 0x1f6f3, + 0x1f700, 0x1f773, + 0x1f780, 0x1f7d4, + 0x1f800, 0x1f80b, + 0x1f810, 0x1f847, + 0x1f850, 0x1f859, + 0x1f860, 0x1f887, + 0x1f890, 0x1f8ad +}; +UCP_FN(S) + +static const unichar ucp_Sc_def[] = { + 0x24, 0x24, + 0xa2, 0xa5, + 0x58f, 0x58f, + 0x60b, 0x60b, + 0x9f2, 0x9f3, + 0x9fb, 0x9fb, + 0xaf1, 0xaf1, + 0xbf9, 0xbf9, + 0xe3f, 0xe3f, + 0x17db, 0x17db, + 0x20a0, 0x20bd, + 0xa838, 0xa838, + 0xfdfc, 0xfdfc, + 0xfe69, 0xfe69, + 0xff04, 0xff04, + 0xffe0, 0xffe1, + 0xffe5, 0xffe6 +}; +UCP_FN(Sc) + +static const unichar ucp_Sk_def[] = { + 0x5e, 0x5e, + 0x60, 0x60, + 0xa8, 0xa8, + 0xaf, 0xaf, + 0xb4, 0xb4, + 0xb8, 0xb8, + 0x2c2, 0x2c5, + 0x2d2, 0x2df, + 0x2e5, 0x2eb, + 0x2ed, 0x2ed, + 0x2ef, 0x2ff, + 0x375, 0x375, + 0x384, 0x385, + 0x1fbd, 0x1fbd, + 0x1fbf, 0x1fc1, + 0x1fcd, 0x1fcf, + 0x1fdd, 0x1fdf, + 0x1fed, 0x1fef, + 0x1ffd, 0x1ffe, + 0x309b, 0x309c, + 0xa700, 0xa716, + 0xa720, 0xa721, + 0xa789, 0xa78a, + 0xab5b, 0xab5b, + 0xfbb2, 0xfbc1, + 0xff3e, 0xff3e, + 0xff40, 0xff40, + 0xffe3, 0xffe3 +}; +UCP_FN(Sk) + +static const unichar ucp_Sm_def[] = { + 0x2b, 0x2b, + 0x3c, 0x3e, + 0x7c, 0x7c, + 0x7e, 0x7e, + 0xac, 0xac, + 0xb1, 0xb1, + 0xd7, 0xd7, + 0xf7, 0xf7, + 0x3f6, 0x3f6, + 0x606, 0x608, + 0x2044, 0x2044, + 0x2052, 0x2052, + 0x207a, 0x207c, + 0x208a, 0x208c, + 0x2118, 0x2118, + 0x2140, 0x2144, + 0x214b, 0x214b, + 0x2190, 0x2194, + 0x219a, 0x219b, + 0x21a0, 0x21a0, + 0x21a3, 0x21a3, + 0x21a6, 0x21a6, + 0x21ae, 0x21ae, + 0x21ce, 0x21cf, + 0x21d2, 0x21d2, + 0x21d4, 0x21d4, + 0x21f4, 0x22ff, + 0x2320, 0x2321, + 0x237c, 0x237c, + 0x239b, 0x23b3, + 0x23dc, 0x23e1, + 0x25b7, 0x25b7, + 0x25c1, 0x25c1, + 0x25f8, 0x25ff, + 0x266f, 0x266f, + 0x27c0, 0x27c4, + 0x27c7, 0x27e5, + 0x27f0, 0x27ff, + 0x2900, 0x2982, + 0x2999, 0x29d7, + 0x29dc, 0x29fb, + 0x29fe, 0x2aff, + 0x2b30, 0x2b44, + 0x2b47, 0x2b4c, + 0xfb29, 0xfb29, + 0xfe62, 0xfe62, + 0xfe64, 0xfe66, + 0xff0b, 0xff0b, + 0xff1c, 0xff1e, + 0xff5c, 0xff5c, + 0xff5e, 0xff5e, + 0xffe2, 0xffe2, + 0xffe9, 0xffec, + 0x1d6c1, 0x1d6c1, + 0x1d6db, 0x1d6db, + 0x1d6fb, 0x1d6fb, + 0x1d715, 0x1d715, + 0x1d735, 0x1d735, + 0x1d74f, 0x1d74f, + 0x1d76f, 0x1d76f, + 0x1d789, 0x1d789, + 0x1d7a9, 0x1d7a9, + 0x1d7c3, 0x1d7c3, + 0x1eef0, 0x1eef1 +}; +UCP_FN(Sm) + +static const unichar ucp_So_def[] = { + 0xa6, 0xa6, + 0xa9, 0xa9, + 0xae, 0xae, + 0xb0, 0xb0, + 0x482, 0x482, + 0x58d, 0x58e, + 0x60e, 0x60f, + 0x6de, 0x6de, + 0x6e9, 0x6e9, + 0x6fd, 0x6fe, + 0x7f6, 0x7f6, + 0x9fa, 0x9fa, + 0xb70, 0xb70, + 0xbf3, 0xbf8, + 0xbfa, 0xbfa, + 0xc7f, 0xc7f, + 0xd79, 0xd79, + 0xf01, 0xf03, + 0xf13, 0xf13, + 0xf15, 0xf17, + 0xf1a, 0xf1f, + 0xf34, 0xf34, + 0xf36, 0xf36, + 0xf38, 0xf38, + 0xfbe, 0xfc5, + 0xfc7, 0xfcc, + 0xfce, 0xfcf, + 0xfd5, 0xfd8, + 0x109e, 0x109f, + 0x1390, 0x1399, + 0x1940, 0x1940, + 0x19de, 0x19ff, + 0x1b61, 0x1b6a, + 0x1b74, 0x1b7c, + 0x2100, 0x2101, + 0x2103, 0x2106, + 0x2108, 0x2109, + 0x2114, 0x2114, + 0x2116, 0x2117, + 0x211e, 0x2123, + 0x2125, 0x2125, + 0x2127, 0x2127, + 0x2129, 0x2129, + 0x212e, 0x212e, + 0x213a, 0x213b, + 0x214a, 0x214a, + 0x214c, 0x214d, + 0x214f, 0x214f, + 0x2195, 0x2199, + 0x219c, 0x219f, + 0x21a1, 0x21a2, + 0x21a4, 0x21a5, + 0x21a7, 0x21ad, + 0x21af, 0x21cd, + 0x21d0, 0x21d1, + 0x21d3, 0x21d3, + 0x21d5, 0x21f3, + 0x2300, 0x2307, + 0x230c, 0x231f, + 0x2322, 0x2328, + 0x232b, 0x237b, + 0x237d, 0x239a, + 0x23b4, 0x23db, + 0x23e2, 0x23fa, + 0x2400, 0x2426, + 0x2440, 0x244a, + 0x249c, 0x24e9, + 0x2500, 0x25b6, + 0x25b8, 0x25c0, + 0x25c2, 0x25f7, + 0x2600, 0x266e, + 0x2670, 0x2767, + 0x2794, 0x27bf, + 0x2800, 0x28ff, + 0x2b00, 0x2b2f, + 0x2b45, 0x2b46, + 0x2b4d, 0x2b73, + 0x2b76, 0x2b95, + 0x2b98, 0x2bb9, + 0x2bbd, 0x2bc8, + 0x2bca, 0x2bd1, + 0x2ce5, 0x2cea, + 0x2e80, 0x2e99, + 0x2e9b, 0x2ef3, + 0x2f00, 0x2fd5, + 0x2ff0, 0x2ffb, + 0x3004, 0x3004, + 0x3012, 0x3013, + 0x3020, 0x3020, + 0x3036, 0x3037, + 0x303e, 0x303f, + 0x3190, 0x3191, + 0x3196, 0x319f, + 0x31c0, 0x31e3, + 0x3200, 0x321e, + 0x322a, 0x3247, + 0x3250, 0x3250, + 0x3260, 0x327f, + 0x328a, 0x32b0, + 0x32c0, 0x32fe, + 0x3300, 0x33ff, + 0x4dc0, 0x4dff, + 0xa490, 0xa4c6, + 0xa828, 0xa82b, + 0xa836, 0xa837, + 0xa839, 0xa839, + 0xaa77, 0xaa79, + 0xfdfd, 0xfdfd, + 0xffe4, 0xffe4, + 0xffe8, 0xffe8, + 0xffed, 0xffee, + 0xfffc, 0xfffd, + 0x10137, 0x1013f, + 0x10179, 0x10189, + 0x1018c, 0x1018c, + 0x10190, 0x1019b, + 0x101a0, 0x101a0, + 0x101d0, 0x101fc, + 0x10877, 0x10878, + 0x10ac8, 0x10ac8, + 0x16b3c, 0x16b3f, + 0x16b45, 0x16b45, + 0x1bc9c, 0x1bc9c, + 0x1d000, 0x1d0f5, + 0x1d100, 0x1d126, + 0x1d129, 0x1d164, + 0x1d16a, 0x1d16c, + 0x1d183, 0x1d184, + 0x1d18c, 0x1d1a9, + 0x1d1ae, 0x1d1dd, + 0x1d200, 0x1d241, + 0x1d245, 0x1d245, + 0x1d300, 0x1d356, + 0x1f000, 0x1f02b, + 0x1f030, 0x1f093, + 0x1f0a0, 0x1f0ae, + 0x1f0b1, 0x1f0bf, + 0x1f0c1, 0x1f0cf, + 0x1f0d1, 0x1f0f5, + 0x1f110, 0x1f12e, + 0x1f130, 0x1f16b, + 0x1f170, 0x1f19a, + 0x1f1e6, 0x1f202, + 0x1f210, 0x1f23a, + 0x1f240, 0x1f248, + 0x1f250, 0x1f251, + 0x1f300, 0x1f32c, + 0x1f330, 0x1f37d, + 0x1f380, 0x1f3ce, + 0x1f3d4, 0x1f3f7, + 0x1f400, 0x1f4fe, + 0x1f500, 0x1f54a, + 0x1f550, 0x1f579, + 0x1f57b, 0x1f5a3, + 0x1f5a5, 0x1f642, + 0x1f645, 0x1f6cf, + 0x1f6e0, 0x1f6ec, + 0x1f6f0, 0x1f6f3, + 0x1f700, 0x1f773, + 0x1f780, 0x1f7d4, + 0x1f800, 0x1f80b, + 0x1f810, 0x1f847, + 0x1f850, 0x1f859, + 0x1f860, 0x1f887, + 0x1f890, 0x1f8ad +}; +UCP_FN(So) + +static const unichar ucp_Xan_def[] = { + 0x30, 0x39, + 0x41, 0x5a, + 0x61, 0x7a, + 0xaa, 0xaa, + 0xb2, 0xb3, + 0xb5, 0xb5, + 0xb9, 0xba, + 0xbc, 0xbe, + 0xc0, 0xd6, + 0xd8, 0xf6, + 0xf8, 0x2c1, + 0x2c6, 0x2d1, + 0x2e0, 0x2e4, + 0x2ec, 0x2ec, + 0x2ee, 0x2ee, + 0x370, 0x374, + 0x376, 0x377, + 0x37a, 0x37d, + 0x37f, 0x37f, + 0x386, 0x386, + 0x388, 0x38a, + 0x38c, 0x38c, + 0x38e, 0x3a1, + 0x3a3, 0x3f5, + 0x3f7, 0x481, + 0x48a, 0x52f, + 0x531, 0x556, + 0x559, 0x559, + 0x561, 0x587, + 0x5d0, 0x5ea, + 0x5f0, 0x5f2, + 0x620, 0x64a, + 0x660, 0x669, + 0x66e, 0x66f, + 0x671, 0x6d3, + 0x6d5, 0x6d5, + 0x6e5, 0x6e6, + 0x6ee, 0x6fc, + 0x6ff, 0x6ff, + 0x710, 0x710, + 0x712, 0x72f, + 0x74d, 0x7a5, + 0x7b1, 0x7b1, + 0x7c0, 0x7ea, + 0x7f4, 0x7f5, + 0x7fa, 0x7fa, + 0x800, 0x815, + 0x81a, 0x81a, + 0x824, 0x824, + 0x828, 0x828, + 0x840, 0x858, + 0x8a0, 0x8b2, + 0x904, 0x939, + 0x93d, 0x93d, + 0x950, 0x950, + 0x958, 0x961, + 0x966, 0x96f, + 0x971, 0x980, + 0x985, 0x98c, + 0x98f, 0x990, + 0x993, 0x9a8, + 0x9aa, 0x9b0, + 0x9b2, 0x9b2, + 0x9b6, 0x9b9, + 0x9bd, 0x9bd, + 0x9ce, 0x9ce, + 0x9dc, 0x9dd, + 0x9df, 0x9e1, + 0x9e6, 0x9f1, + 0x9f4, 0x9f9, + 0xa05, 0xa0a, + 0xa0f, 0xa10, + 0xa13, 0xa28, + 0xa2a, 0xa30, + 0xa32, 0xa33, + 0xa35, 0xa36, + 0xa38, 0xa39, + 0xa59, 0xa5c, + 0xa5e, 0xa5e, + 0xa66, 0xa6f, + 0xa72, 0xa74, + 0xa85, 0xa8d, + 0xa8f, 0xa91, + 0xa93, 0xaa8, + 0xaaa, 0xab0, + 0xab2, 0xab3, + 0xab5, 0xab9, + 0xabd, 0xabd, + 0xad0, 0xad0, + 0xae0, 0xae1, + 0xae6, 0xaef, + 0xb05, 0xb0c, + 0xb0f, 0xb10, + 0xb13, 0xb28, + 0xb2a, 0xb30, + 0xb32, 0xb33, + 0xb35, 0xb39, + 0xb3d, 0xb3d, + 0xb5c, 0xb5d, + 0xb5f, 0xb61, + 0xb66, 0xb6f, + 0xb71, 0xb77, + 0xb83, 0xb83, + 0xb85, 0xb8a, + 0xb8e, 0xb90, + 0xb92, 0xb95, + 0xb99, 0xb9a, + 0xb9c, 0xb9c, + 0xb9e, 0xb9f, + 0xba3, 0xba4, + 0xba8, 0xbaa, + 0xbae, 0xbb9, + 0xbd0, 0xbd0, + 0xbe6, 0xbf2, + 0xc05, 0xc0c, + 0xc0e, 0xc10, + 0xc12, 0xc28, + 0xc2a, 0xc39, + 0xc3d, 0xc3d, + 0xc58, 0xc59, + 0xc60, 0xc61, + 0xc66, 0xc6f, + 0xc78, 0xc7e, + 0xc85, 0xc8c, + 0xc8e, 0xc90, + 0xc92, 0xca8, + 0xcaa, 0xcb3, + 0xcb5, 0xcb9, + 0xcbd, 0xcbd, + 0xcde, 0xcde, + 0xce0, 0xce1, + 0xce6, 0xcef, + 0xcf1, 0xcf2, + 0xd05, 0xd0c, + 0xd0e, 0xd10, + 0xd12, 0xd3a, + 0xd3d, 0xd3d, + 0xd4e, 0xd4e, + 0xd60, 0xd61, + 0xd66, 0xd75, + 0xd7a, 0xd7f, + 0xd85, 0xd96, + 0xd9a, 0xdb1, + 0xdb3, 0xdbb, + 0xdbd, 0xdbd, + 0xdc0, 0xdc6, + 0xde6, 0xdef, + 0xe01, 0xe30, + 0xe32, 0xe33, + 0xe40, 0xe46, + 0xe50, 0xe59, + 0xe81, 0xe82, + 0xe84, 0xe84, + 0xe87, 0xe88, + 0xe8a, 0xe8a, + 0xe8d, 0xe8d, + 0xe94, 0xe97, + 0xe99, 0xe9f, + 0xea1, 0xea3, + 0xea5, 0xea5, + 0xea7, 0xea7, + 0xeaa, 0xeab, + 0xead, 0xeb0, + 0xeb2, 0xeb3, + 0xebd, 0xebd, + 0xec0, 0xec4, + 0xec6, 0xec6, + 0xed0, 0xed9, + 0xedc, 0xedf, + 0xf00, 0xf00, + 0xf20, 0xf33, + 0xf40, 0xf47, + 0xf49, 0xf6c, + 0xf88, 0xf8c, + 0x1000, 0x102a, + 0x103f, 0x1049, + 0x1050, 0x1055, + 0x105a, 0x105d, + 0x1061, 0x1061, + 0x1065, 0x1066, + 0x106e, 0x1070, + 0x1075, 0x1081, + 0x108e, 0x108e, + 0x1090, 0x1099, + 0x10a0, 0x10c5, + 0x10c7, 0x10c7, + 0x10cd, 0x10cd, + 0x10d0, 0x10fa, + 0x10fc, 0x1248, + 0x124a, 0x124d, + 0x1250, 0x1256, + 0x1258, 0x1258, + 0x125a, 0x125d, + 0x1260, 0x1288, + 0x128a, 0x128d, + 0x1290, 0x12b0, + 0x12b2, 0x12b5, + 0x12b8, 0x12be, + 0x12c0, 0x12c0, + 0x12c2, 0x12c5, + 0x12c8, 0x12d6, + 0x12d8, 0x1310, + 0x1312, 0x1315, + 0x1318, 0x135a, + 0x1369, 0x137c, + 0x1380, 0x138f, + 0x13a0, 0x13f4, + 0x1401, 0x166c, + 0x166f, 0x167f, + 0x1681, 0x169a, + 0x16a0, 0x16ea, + 0x16ee, 0x16f8, + 0x1700, 0x170c, + 0x170e, 0x1711, + 0x1720, 0x1731, + 0x1740, 0x1751, + 0x1760, 0x176c, + 0x176e, 0x1770, + 0x1780, 0x17b3, + 0x17d7, 0x17d7, + 0x17dc, 0x17dc, + 0x17e0, 0x17e9, + 0x17f0, 0x17f9, + 0x1810, 0x1819, + 0x1820, 0x1877, + 0x1880, 0x18a8, + 0x18aa, 0x18aa, + 0x18b0, 0x18f5, + 0x1900, 0x191e, + 0x1946, 0x196d, + 0x1970, 0x1974, + 0x1980, 0x19ab, + 0x19c1, 0x19c7, + 0x19d0, 0x19da, + 0x1a00, 0x1a16, + 0x1a20, 0x1a54, + 0x1a80, 0x1a89, + 0x1a90, 0x1a99, + 0x1aa7, 0x1aa7, + 0x1b05, 0x1b33, + 0x1b45, 0x1b4b, + 0x1b50, 0x1b59, + 0x1b83, 0x1ba0, + 0x1bae, 0x1be5, + 0x1c00, 0x1c23, + 0x1c40, 0x1c49, + 0x1c4d, 0x1c7d, + 0x1ce9, 0x1cec, + 0x1cee, 0x1cf1, + 0x1cf5, 0x1cf6, + 0x1d00, 0x1dbf, + 0x1e00, 0x1f15, + 0x1f18, 0x1f1d, + 0x1f20, 0x1f45, + 0x1f48, 0x1f4d, + 0x1f50, 0x1f57, + 0x1f59, 0x1f59, + 0x1f5b, 0x1f5b, + 0x1f5d, 0x1f5d, + 0x1f5f, 0x1f7d, + 0x1f80, 0x1fb4, + 0x1fb6, 0x1fbc, + 0x1fbe, 0x1fbe, + 0x1fc2, 0x1fc4, + 0x1fc6, 0x1fcc, + 0x1fd0, 0x1fd3, + 0x1fd6, 0x1fdb, + 0x1fe0, 0x1fec, + 0x1ff2, 0x1ff4, + 0x1ff6, 0x1ffc, + 0x2070, 0x2071, + 0x2074, 0x2079, + 0x207f, 0x2089, + 0x2090, 0x209c, + 0x2102, 0x2102, + 0x2107, 0x2107, + 0x210a, 0x2113, + 0x2115, 0x2115, + 0x2119, 0x211d, + 0x2124, 0x2124, + 0x2126, 0x2126, + 0x2128, 0x2128, + 0x212a, 0x212d, + 0x212f, 0x2139, + 0x213c, 0x213f, + 0x2145, 0x2149, + 0x214e, 0x214e, + 0x2150, 0x2189, + 0x2460, 0x249b, + 0x24ea, 0x24ff, + 0x2776, 0x2793, + 0x2c00, 0x2c2e, + 0x2c30, 0x2c5e, + 0x2c60, 0x2ce4, + 0x2ceb, 0x2cee, + 0x2cf2, 0x2cf3, + 0x2cfd, 0x2cfd, + 0x2d00, 0x2d25, + 0x2d27, 0x2d27, + 0x2d2d, 0x2d2d, + 0x2d30, 0x2d67, + 0x2d6f, 0x2d6f, + 0x2d80, 0x2d96, + 0x2da0, 0x2da6, + 0x2da8, 0x2dae, + 0x2db0, 0x2db6, + 0x2db8, 0x2dbe, + 0x2dc0, 0x2dc6, + 0x2dc8, 0x2dce, + 0x2dd0, 0x2dd6, + 0x2dd8, 0x2dde, + 0x2e2f, 0x2e2f, + 0x3005, 0x3007, + 0x3021, 0x3029, + 0x3031, 0x3035, + 0x3038, 0x303c, + 0x3041, 0x3096, + 0x309d, 0x309f, + 0x30a1, 0x30fa, + 0x30fc, 0x30ff, + 0x3105, 0x312d, + 0x3131, 0x318e, + 0x3192, 0x3195, + 0x31a0, 0x31ba, + 0x31f0, 0x31ff, + 0x3220, 0x3229, + 0x3248, 0x324f, + 0x3251, 0x325f, + 0x3280, 0x3289, + 0x32b1, 0x32bf, + 0x3400, 0x4db5, + 0x4e00, 0x9fcc, + 0xa000, 0xa48c, + 0xa4d0, 0xa4fd, + 0xa500, 0xa60c, + 0xa610, 0xa62b, + 0xa640, 0xa66e, + 0xa67f, 0xa69d, + 0xa6a0, 0xa6ef, + 0xa717, 0xa71f, + 0xa722, 0xa788, + 0xa78b, 0xa78e, + 0xa790, 0xa7ad, + 0xa7b0, 0xa7b1, + 0xa7f7, 0xa801, + 0xa803, 0xa805, + 0xa807, 0xa80a, + 0xa80c, 0xa822, + 0xa830, 0xa835, + 0xa840, 0xa873, + 0xa882, 0xa8b3, + 0xa8d0, 0xa8d9, + 0xa8f2, 0xa8f7, + 0xa8fb, 0xa8fb, + 0xa900, 0xa925, + 0xa930, 0xa946, + 0xa960, 0xa97c, + 0xa984, 0xa9b2, + 0xa9cf, 0xa9d9, + 0xa9e0, 0xa9e4, + 0xa9e6, 0xa9fe, + 0xaa00, 0xaa28, + 0xaa40, 0xaa42, + 0xaa44, 0xaa4b, + 0xaa50, 0xaa59, + 0xaa60, 0xaa76, + 0xaa7a, 0xaa7a, + 0xaa7e, 0xaaaf, + 0xaab1, 0xaab1, + 0xaab5, 0xaab6, + 0xaab9, 0xaabd, + 0xaac0, 0xaac0, + 0xaac2, 0xaac2, + 0xaadb, 0xaadd, + 0xaae0, 0xaaea, + 0xaaf2, 0xaaf4, + 0xab01, 0xab06, + 0xab09, 0xab0e, + 0xab11, 0xab16, + 0xab20, 0xab26, + 0xab28, 0xab2e, + 0xab30, 0xab5a, + 0xab5c, 0xab5f, + 0xab64, 0xab65, + 0xabc0, 0xabe2, + 0xabf0, 0xabf9, + 0xac00, 0xd7a3, + 0xd7b0, 0xd7c6, + 0xd7cb, 0xd7fb, + 0xf900, 0xfa6d, + 0xfa70, 0xfad9, + 0xfb00, 0xfb06, + 0xfb13, 0xfb17, + 0xfb1d, 0xfb1d, + 0xfb1f, 0xfb28, + 0xfb2a, 0xfb36, + 0xfb38, 0xfb3c, + 0xfb3e, 0xfb3e, + 0xfb40, 0xfb41, + 0xfb43, 0xfb44, + 0xfb46, 0xfbb1, + 0xfbd3, 0xfd3d, + 0xfd50, 0xfd8f, + 0xfd92, 0xfdc7, + 0xfdf0, 0xfdfb, + 0xfe70, 0xfe74, + 0xfe76, 0xfefc, + 0xff10, 0xff19, + 0xff21, 0xff3a, + 0xff41, 0xff5a, + 0xff66, 0xffbe, + 0xffc2, 0xffc7, + 0xffca, 0xffcf, + 0xffd2, 0xffd7, + 0xffda, 0xffdc, + 0x10000, 0x1000b, + 0x1000d, 0x10026, + 0x10028, 0x1003a, + 0x1003c, 0x1003d, + 0x1003f, 0x1004d, + 0x10050, 0x1005d, + 0x10080, 0x100fa, + 0x10107, 0x10133, + 0x10140, 0x10178, + 0x1018a, 0x1018b, + 0x10280, 0x1029c, + 0x102a0, 0x102d0, + 0x102e1, 0x102fb, + 0x10300, 0x10323, + 0x10330, 0x1034a, + 0x10350, 0x10375, + 0x10380, 0x1039d, + 0x103a0, 0x103c3, + 0x103c8, 0x103cf, + 0x103d1, 0x103d5, + 0x10400, 0x1049d, + 0x104a0, 0x104a9, + 0x10500, 0x10527, + 0x10530, 0x10563, + 0x10600, 0x10736, + 0x10740, 0x10755, + 0x10760, 0x10767, + 0x10800, 0x10805, + 0x10808, 0x10808, + 0x1080a, 0x10835, + 0x10837, 0x10838, + 0x1083c, 0x1083c, + 0x1083f, 0x10855, + 0x10858, 0x10876, + 0x10879, 0x1089e, + 0x108a7, 0x108af, + 0x10900, 0x1091b, + 0x10920, 0x10939, + 0x10980, 0x109b7, + 0x109be, 0x109bf, + 0x10a00, 0x10a00, + 0x10a10, 0x10a13, + 0x10a15, 0x10a17, + 0x10a19, 0x10a33, + 0x10a40, 0x10a47, + 0x10a60, 0x10a7e, + 0x10a80, 0x10a9f, + 0x10ac0, 0x10ac7, + 0x10ac9, 0x10ae4, + 0x10aeb, 0x10aef, + 0x10b00, 0x10b35, + 0x10b40, 0x10b55, + 0x10b58, 0x10b72, + 0x10b78, 0x10b91, + 0x10ba9, 0x10baf, + 0x10c00, 0x10c48, + 0x10e60, 0x10e7e, + 0x11003, 0x11037, + 0x11052, 0x1106f, + 0x11083, 0x110af, + 0x110d0, 0x110e8, + 0x110f0, 0x110f9, + 0x11103, 0x11126, + 0x11136, 0x1113f, + 0x11150, 0x11172, + 0x11176, 0x11176, + 0x11183, 0x111b2, + 0x111c1, 0x111c4, + 0x111d0, 0x111da, + 0x111e1, 0x111f4, + 0x11200, 0x11211, + 0x11213, 0x1122b, + 0x112b0, 0x112de, + 0x112f0, 0x112f9, + 0x11305, 0x1130c, + 0x1130f, 0x11310, + 0x11313, 0x11328, + 0x1132a, 0x11330, + 0x11332, 0x11333, + 0x11335, 0x11339, + 0x1133d, 0x1133d, + 0x1135d, 0x11361, + 0x11480, 0x114af, + 0x114c4, 0x114c5, + 0x114c7, 0x114c7, + 0x114d0, 0x114d9, + 0x11580, 0x115ae, + 0x11600, 0x1162f, + 0x11644, 0x11644, + 0x11650, 0x11659, + 0x11680, 0x116aa, + 0x116c0, 0x116c9, + 0x118a0, 0x118f2, + 0x118ff, 0x118ff, + 0x11ac0, 0x11af8, + 0x12000, 0x12398, + 0x12400, 0x1246e, + 0x13000, 0x1342e, + 0x16800, 0x16a38, + 0x16a40, 0x16a5e, + 0x16a60, 0x16a69, + 0x16ad0, 0x16aed, + 0x16b00, 0x16b2f, + 0x16b40, 0x16b43, + 0x16b50, 0x16b59, + 0x16b5b, 0x16b61, + 0x16b63, 0x16b77, + 0x16b7d, 0x16b8f, + 0x16f00, 0x16f44, + 0x16f50, 0x16f50, + 0x16f93, 0x16f9f, + 0x1b000, 0x1b001, + 0x1bc00, 0x1bc6a, + 0x1bc70, 0x1bc7c, + 0x1bc80, 0x1bc88, + 0x1bc90, 0x1bc99, + 0x1d360, 0x1d371, + 0x1d400, 0x1d454, + 0x1d456, 0x1d49c, + 0x1d49e, 0x1d49f, + 0x1d4a2, 0x1d4a2, + 0x1d4a5, 0x1d4a6, + 0x1d4a9, 0x1d4ac, + 0x1d4ae, 0x1d4b9, + 0x1d4bb, 0x1d4bb, + 0x1d4bd, 0x1d4c3, + 0x1d4c5, 0x1d505, + 0x1d507, 0x1d50a, + 0x1d50d, 0x1d514, + 0x1d516, 0x1d51c, + 0x1d51e, 0x1d539, + 0x1d53b, 0x1d53e, + 0x1d540, 0x1d544, + 0x1d546, 0x1d546, + 0x1d54a, 0x1d550, + 0x1d552, 0x1d6a5, + 0x1d6a8, 0x1d6c0, + 0x1d6c2, 0x1d6da, + 0x1d6dc, 0x1d6fa, + 0x1d6fc, 0x1d714, + 0x1d716, 0x1d734, + 0x1d736, 0x1d74e, + 0x1d750, 0x1d76e, + 0x1d770, 0x1d788, + 0x1d78a, 0x1d7a8, + 0x1d7aa, 0x1d7c2, + 0x1d7c4, 0x1d7cb, + 0x1d7ce, 0x1d7ff, + 0x1e800, 0x1e8c4, + 0x1e8c7, 0x1e8cf, + 0x1ee00, 0x1ee03, + 0x1ee05, 0x1ee1f, + 0x1ee21, 0x1ee22, + 0x1ee24, 0x1ee24, + 0x1ee27, 0x1ee27, + 0x1ee29, 0x1ee32, + 0x1ee34, 0x1ee37, + 0x1ee39, 0x1ee39, + 0x1ee3b, 0x1ee3b, + 0x1ee42, 0x1ee42, + 0x1ee47, 0x1ee47, + 0x1ee49, 0x1ee49, + 0x1ee4b, 0x1ee4b, + 0x1ee4d, 0x1ee4f, + 0x1ee51, 0x1ee52, + 0x1ee54, 0x1ee54, + 0x1ee57, 0x1ee57, + 0x1ee59, 0x1ee59, + 0x1ee5b, 0x1ee5b, + 0x1ee5d, 0x1ee5d, + 0x1ee5f, 0x1ee5f, + 0x1ee61, 0x1ee62, + 0x1ee64, 0x1ee64, + 0x1ee67, 0x1ee6a, + 0x1ee6c, 0x1ee72, + 0x1ee74, 0x1ee77, + 0x1ee79, 0x1ee7c, + 0x1ee7e, 0x1ee7e, + 0x1ee80, 0x1ee89, + 0x1ee8b, 0x1ee9b, + 0x1eea1, 0x1eea3, + 0x1eea5, 0x1eea9, + 0x1eeab, 0x1eebb, + 0x1f100, 0x1f10c, + 0x20000, 0x2a6d6, + 0x2a700, 0x2b734, + 0x2b740, 0x2b81d, + 0x2f800, 0x2fa1d +}; +UCP_FN(Xan) + +static const unichar ucp_Xps_def[] = { + 0x9, 0xd, + 0x20, 0x20, + 0xa0, 0xa0, + 0x1680, 0x1680, + 0x2000, 0x200a, + 0x2028, 0x2029, + 0x202f, 0x202f, + 0x205f, 0x205f, + 0x3000, 0x3000 +}; +UCP_FN(Xps) + +static const unichar ucp_Xsp_def[] = { + 0x9, 0xa, + 0xc, 0xd, + 0x20, 0x20, + 0xa0, 0xa0, + 0x1680, 0x1680, + 0x2000, 0x200a, + 0x2028, 0x2029, + 0x202f, 0x202f, + 0x205f, 0x205f, + 0x3000, 0x3000 +}; +UCP_FN(Xsp) + +static const unichar ucp_Xwd_def[] = { + 0x30, 0x39, + 0x41, 0x5a, + 0x5f, 0x5f, + 0x61, 0x7a, + 0xaa, 0xaa, + 0xb2, 0xb3, + 0xb5, 0xb5, + 0xb9, 0xba, + 0xbc, 0xbe, + 0xc0, 0xd6, + 0xd8, 0xf6, + 0xf8, 0x2c1, + 0x2c6, 0x2d1, + 0x2e0, 0x2e4, + 0x2ec, 0x2ec, + 0x2ee, 0x2ee, + 0x370, 0x374, + 0x376, 0x377, + 0x37a, 0x37d, + 0x37f, 0x37f, + 0x386, 0x386, + 0x388, 0x38a, + 0x38c, 0x38c, + 0x38e, 0x3a1, + 0x3a3, 0x3f5, + 0x3f7, 0x481, + 0x48a, 0x52f, + 0x531, 0x556, + 0x559, 0x559, + 0x561, 0x587, + 0x5d0, 0x5ea, + 0x5f0, 0x5f2, + 0x620, 0x64a, + 0x660, 0x669, + 0x66e, 0x66f, + 0x671, 0x6d3, + 0x6d5, 0x6d5, + 0x6e5, 0x6e6, + 0x6ee, 0x6fc, + 0x6ff, 0x6ff, + 0x710, 0x710, + 0x712, 0x72f, + 0x74d, 0x7a5, + 0x7b1, 0x7b1, + 0x7c0, 0x7ea, + 0x7f4, 0x7f5, + 0x7fa, 0x7fa, + 0x800, 0x815, + 0x81a, 0x81a, + 0x824, 0x824, + 0x828, 0x828, + 0x840, 0x858, + 0x8a0, 0x8b2, + 0x904, 0x939, + 0x93d, 0x93d, + 0x950, 0x950, + 0x958, 0x961, + 0x966, 0x96f, + 0x971, 0x980, + 0x985, 0x98c, + 0x98f, 0x990, + 0x993, 0x9a8, + 0x9aa, 0x9b0, + 0x9b2, 0x9b2, + 0x9b6, 0x9b9, + 0x9bd, 0x9bd, + 0x9ce, 0x9ce, + 0x9dc, 0x9dd, + 0x9df, 0x9e1, + 0x9e6, 0x9f1, + 0x9f4, 0x9f9, + 0xa05, 0xa0a, + 0xa0f, 0xa10, + 0xa13, 0xa28, + 0xa2a, 0xa30, + 0xa32, 0xa33, + 0xa35, 0xa36, + 0xa38, 0xa39, + 0xa59, 0xa5c, + 0xa5e, 0xa5e, + 0xa66, 0xa6f, + 0xa72, 0xa74, + 0xa85, 0xa8d, + 0xa8f, 0xa91, + 0xa93, 0xaa8, + 0xaaa, 0xab0, + 0xab2, 0xab3, + 0xab5, 0xab9, + 0xabd, 0xabd, + 0xad0, 0xad0, + 0xae0, 0xae1, + 0xae6, 0xaef, + 0xb05, 0xb0c, + 0xb0f, 0xb10, + 0xb13, 0xb28, + 0xb2a, 0xb30, + 0xb32, 0xb33, + 0xb35, 0xb39, + 0xb3d, 0xb3d, + 0xb5c, 0xb5d, + 0xb5f, 0xb61, + 0xb66, 0xb6f, + 0xb71, 0xb77, + 0xb83, 0xb83, + 0xb85, 0xb8a, + 0xb8e, 0xb90, + 0xb92, 0xb95, + 0xb99, 0xb9a, + 0xb9c, 0xb9c, + 0xb9e, 0xb9f, + 0xba3, 0xba4, + 0xba8, 0xbaa, + 0xbae, 0xbb9, + 0xbd0, 0xbd0, + 0xbe6, 0xbf2, + 0xc05, 0xc0c, + 0xc0e, 0xc10, + 0xc12, 0xc28, + 0xc2a, 0xc39, + 0xc3d, 0xc3d, + 0xc58, 0xc59, + 0xc60, 0xc61, + 0xc66, 0xc6f, + 0xc78, 0xc7e, + 0xc85, 0xc8c, + 0xc8e, 0xc90, + 0xc92, 0xca8, + 0xcaa, 0xcb3, + 0xcb5, 0xcb9, + 0xcbd, 0xcbd, + 0xcde, 0xcde, + 0xce0, 0xce1, + 0xce6, 0xcef, + 0xcf1, 0xcf2, + 0xd05, 0xd0c, + 0xd0e, 0xd10, + 0xd12, 0xd3a, + 0xd3d, 0xd3d, + 0xd4e, 0xd4e, + 0xd60, 0xd61, + 0xd66, 0xd75, + 0xd7a, 0xd7f, + 0xd85, 0xd96, + 0xd9a, 0xdb1, + 0xdb3, 0xdbb, + 0xdbd, 0xdbd, + 0xdc0, 0xdc6, + 0xde6, 0xdef, + 0xe01, 0xe30, + 0xe32, 0xe33, + 0xe40, 0xe46, + 0xe50, 0xe59, + 0xe81, 0xe82, + 0xe84, 0xe84, + 0xe87, 0xe88, + 0xe8a, 0xe8a, + 0xe8d, 0xe8d, + 0xe94, 0xe97, + 0xe99, 0xe9f, + 0xea1, 0xea3, + 0xea5, 0xea5, + 0xea7, 0xea7, + 0xeaa, 0xeab, + 0xead, 0xeb0, + 0xeb2, 0xeb3, + 0xebd, 0xebd, + 0xec0, 0xec4, + 0xec6, 0xec6, + 0xed0, 0xed9, + 0xedc, 0xedf, + 0xf00, 0xf00, + 0xf20, 0xf33, + 0xf40, 0xf47, + 0xf49, 0xf6c, + 0xf88, 0xf8c, + 0x1000, 0x102a, + 0x103f, 0x1049, + 0x1050, 0x1055, + 0x105a, 0x105d, + 0x1061, 0x1061, + 0x1065, 0x1066, + 0x106e, 0x1070, + 0x1075, 0x1081, + 0x108e, 0x108e, + 0x1090, 0x1099, + 0x10a0, 0x10c5, + 0x10c7, 0x10c7, + 0x10cd, 0x10cd, + 0x10d0, 0x10fa, + 0x10fc, 0x1248, + 0x124a, 0x124d, + 0x1250, 0x1256, + 0x1258, 0x1258, + 0x125a, 0x125d, + 0x1260, 0x1288, + 0x128a, 0x128d, + 0x1290, 0x12b0, + 0x12b2, 0x12b5, + 0x12b8, 0x12be, + 0x12c0, 0x12c0, + 0x12c2, 0x12c5, + 0x12c8, 0x12d6, + 0x12d8, 0x1310, + 0x1312, 0x1315, + 0x1318, 0x135a, + 0x1369, 0x137c, + 0x1380, 0x138f, + 0x13a0, 0x13f4, + 0x1401, 0x166c, + 0x166f, 0x167f, + 0x1681, 0x169a, + 0x16a0, 0x16ea, + 0x16ee, 0x16f8, + 0x1700, 0x170c, + 0x170e, 0x1711, + 0x1720, 0x1731, + 0x1740, 0x1751, + 0x1760, 0x176c, + 0x176e, 0x1770, + 0x1780, 0x17b3, + 0x17d7, 0x17d7, + 0x17dc, 0x17dc, + 0x17e0, 0x17e9, + 0x17f0, 0x17f9, + 0x1810, 0x1819, + 0x1820, 0x1877, + 0x1880, 0x18a8, + 0x18aa, 0x18aa, + 0x18b0, 0x18f5, + 0x1900, 0x191e, + 0x1946, 0x196d, + 0x1970, 0x1974, + 0x1980, 0x19ab, + 0x19c1, 0x19c7, + 0x19d0, 0x19da, + 0x1a00, 0x1a16, + 0x1a20, 0x1a54, + 0x1a80, 0x1a89, + 0x1a90, 0x1a99, + 0x1aa7, 0x1aa7, + 0x1b05, 0x1b33, + 0x1b45, 0x1b4b, + 0x1b50, 0x1b59, + 0x1b83, 0x1ba0, + 0x1bae, 0x1be5, + 0x1c00, 0x1c23, + 0x1c40, 0x1c49, + 0x1c4d, 0x1c7d, + 0x1ce9, 0x1cec, + 0x1cee, 0x1cf1, + 0x1cf5, 0x1cf6, + 0x1d00, 0x1dbf, + 0x1e00, 0x1f15, + 0x1f18, 0x1f1d, + 0x1f20, 0x1f45, + 0x1f48, 0x1f4d, + 0x1f50, 0x1f57, + 0x1f59, 0x1f59, + 0x1f5b, 0x1f5b, + 0x1f5d, 0x1f5d, + 0x1f5f, 0x1f7d, + 0x1f80, 0x1fb4, + 0x1fb6, 0x1fbc, + 0x1fbe, 0x1fbe, + 0x1fc2, 0x1fc4, + 0x1fc6, 0x1fcc, + 0x1fd0, 0x1fd3, + 0x1fd6, 0x1fdb, + 0x1fe0, 0x1fec, + 0x1ff2, 0x1ff4, + 0x1ff6, 0x1ffc, + 0x2070, 0x2071, + 0x2074, 0x2079, + 0x207f, 0x2089, + 0x2090, 0x209c, + 0x2102, 0x2102, + 0x2107, 0x2107, + 0x210a, 0x2113, + 0x2115, 0x2115, + 0x2119, 0x211d, + 0x2124, 0x2124, + 0x2126, 0x2126, + 0x2128, 0x2128, + 0x212a, 0x212d, + 0x212f, 0x2139, + 0x213c, 0x213f, + 0x2145, 0x2149, + 0x214e, 0x214e, + 0x2150, 0x2189, + 0x2460, 0x249b, + 0x24ea, 0x24ff, + 0x2776, 0x2793, + 0x2c00, 0x2c2e, + 0x2c30, 0x2c5e, + 0x2c60, 0x2ce4, + 0x2ceb, 0x2cee, + 0x2cf2, 0x2cf3, + 0x2cfd, 0x2cfd, + 0x2d00, 0x2d25, + 0x2d27, 0x2d27, + 0x2d2d, 0x2d2d, + 0x2d30, 0x2d67, + 0x2d6f, 0x2d6f, + 0x2d80, 0x2d96, + 0x2da0, 0x2da6, + 0x2da8, 0x2dae, + 0x2db0, 0x2db6, + 0x2db8, 0x2dbe, + 0x2dc0, 0x2dc6, + 0x2dc8, 0x2dce, + 0x2dd0, 0x2dd6, + 0x2dd8, 0x2dde, + 0x2e2f, 0x2e2f, + 0x3005, 0x3007, + 0x3021, 0x3029, + 0x3031, 0x3035, + 0x3038, 0x303c, + 0x3041, 0x3096, + 0x309d, 0x309f, + 0x30a1, 0x30fa, + 0x30fc, 0x30ff, + 0x3105, 0x312d, + 0x3131, 0x318e, + 0x3192, 0x3195, + 0x31a0, 0x31ba, + 0x31f0, 0x31ff, + 0x3220, 0x3229, + 0x3248, 0x324f, + 0x3251, 0x325f, + 0x3280, 0x3289, + 0x32b1, 0x32bf, + 0x3400, 0x4db5, + 0x4e00, 0x9fcc, + 0xa000, 0xa48c, + 0xa4d0, 0xa4fd, + 0xa500, 0xa60c, + 0xa610, 0xa62b, + 0xa640, 0xa66e, + 0xa67f, 0xa69d, + 0xa6a0, 0xa6ef, + 0xa717, 0xa71f, + 0xa722, 0xa788, + 0xa78b, 0xa78e, + 0xa790, 0xa7ad, + 0xa7b0, 0xa7b1, + 0xa7f7, 0xa801, + 0xa803, 0xa805, + 0xa807, 0xa80a, + 0xa80c, 0xa822, + 0xa830, 0xa835, + 0xa840, 0xa873, + 0xa882, 0xa8b3, + 0xa8d0, 0xa8d9, + 0xa8f2, 0xa8f7, + 0xa8fb, 0xa8fb, + 0xa900, 0xa925, + 0xa930, 0xa946, + 0xa960, 0xa97c, + 0xa984, 0xa9b2, + 0xa9cf, 0xa9d9, + 0xa9e0, 0xa9e4, + 0xa9e6, 0xa9fe, + 0xaa00, 0xaa28, + 0xaa40, 0xaa42, + 0xaa44, 0xaa4b, + 0xaa50, 0xaa59, + 0xaa60, 0xaa76, + 0xaa7a, 0xaa7a, + 0xaa7e, 0xaaaf, + 0xaab1, 0xaab1, + 0xaab5, 0xaab6, + 0xaab9, 0xaabd, + 0xaac0, 0xaac0, + 0xaac2, 0xaac2, + 0xaadb, 0xaadd, + 0xaae0, 0xaaea, + 0xaaf2, 0xaaf4, + 0xab01, 0xab06, + 0xab09, 0xab0e, + 0xab11, 0xab16, + 0xab20, 0xab26, + 0xab28, 0xab2e, + 0xab30, 0xab5a, + 0xab5c, 0xab5f, + 0xab64, 0xab65, + 0xabc0, 0xabe2, + 0xabf0, 0xabf9, + 0xac00, 0xd7a3, + 0xd7b0, 0xd7c6, + 0xd7cb, 0xd7fb, + 0xf900, 0xfa6d, + 0xfa70, 0xfad9, + 0xfb00, 0xfb06, + 0xfb13, 0xfb17, + 0xfb1d, 0xfb1d, + 0xfb1f, 0xfb28, + 0xfb2a, 0xfb36, + 0xfb38, 0xfb3c, + 0xfb3e, 0xfb3e, + 0xfb40, 0xfb41, + 0xfb43, 0xfb44, + 0xfb46, 0xfbb1, + 0xfbd3, 0xfd3d, + 0xfd50, 0xfd8f, + 0xfd92, 0xfdc7, + 0xfdf0, 0xfdfb, + 0xfe70, 0xfe74, + 0xfe76, 0xfefc, + 0xff10, 0xff19, + 0xff21, 0xff3a, + 0xff41, 0xff5a, + 0xff66, 0xffbe, + 0xffc2, 0xffc7, + 0xffca, 0xffcf, + 0xffd2, 0xffd7, + 0xffda, 0xffdc, + 0x10000, 0x1000b, + 0x1000d, 0x10026, + 0x10028, 0x1003a, + 0x1003c, 0x1003d, + 0x1003f, 0x1004d, + 0x10050, 0x1005d, + 0x10080, 0x100fa, + 0x10107, 0x10133, + 0x10140, 0x10178, + 0x1018a, 0x1018b, + 0x10280, 0x1029c, + 0x102a0, 0x102d0, + 0x102e1, 0x102fb, + 0x10300, 0x10323, + 0x10330, 0x1034a, + 0x10350, 0x10375, + 0x10380, 0x1039d, + 0x103a0, 0x103c3, + 0x103c8, 0x103cf, + 0x103d1, 0x103d5, + 0x10400, 0x1049d, + 0x104a0, 0x104a9, + 0x10500, 0x10527, + 0x10530, 0x10563, + 0x10600, 0x10736, + 0x10740, 0x10755, + 0x10760, 0x10767, + 0x10800, 0x10805, + 0x10808, 0x10808, + 0x1080a, 0x10835, + 0x10837, 0x10838, + 0x1083c, 0x1083c, + 0x1083f, 0x10855, + 0x10858, 0x10876, + 0x10879, 0x1089e, + 0x108a7, 0x108af, + 0x10900, 0x1091b, + 0x10920, 0x10939, + 0x10980, 0x109b7, + 0x109be, 0x109bf, + 0x10a00, 0x10a00, + 0x10a10, 0x10a13, + 0x10a15, 0x10a17, + 0x10a19, 0x10a33, + 0x10a40, 0x10a47, + 0x10a60, 0x10a7e, + 0x10a80, 0x10a9f, + 0x10ac0, 0x10ac7, + 0x10ac9, 0x10ae4, + 0x10aeb, 0x10aef, + 0x10b00, 0x10b35, + 0x10b40, 0x10b55, + 0x10b58, 0x10b72, + 0x10b78, 0x10b91, + 0x10ba9, 0x10baf, + 0x10c00, 0x10c48, + 0x10e60, 0x10e7e, + 0x11003, 0x11037, + 0x11052, 0x1106f, + 0x11083, 0x110af, + 0x110d0, 0x110e8, + 0x110f0, 0x110f9, + 0x11103, 0x11126, + 0x11136, 0x1113f, + 0x11150, 0x11172, + 0x11176, 0x11176, + 0x11183, 0x111b2, + 0x111c1, 0x111c4, + 0x111d0, 0x111da, + 0x111e1, 0x111f4, + 0x11200, 0x11211, + 0x11213, 0x1122b, + 0x112b0, 0x112de, + 0x112f0, 0x112f9, + 0x11305, 0x1130c, + 0x1130f, 0x11310, + 0x11313, 0x11328, + 0x1132a, 0x11330, + 0x11332, 0x11333, + 0x11335, 0x11339, + 0x1133d, 0x1133d, + 0x1135d, 0x11361, + 0x11480, 0x114af, + 0x114c4, 0x114c5, + 0x114c7, 0x114c7, + 0x114d0, 0x114d9, + 0x11580, 0x115ae, + 0x11600, 0x1162f, + 0x11644, 0x11644, + 0x11650, 0x11659, + 0x11680, 0x116aa, + 0x116c0, 0x116c9, + 0x118a0, 0x118f2, + 0x118ff, 0x118ff, + 0x11ac0, 0x11af8, + 0x12000, 0x12398, + 0x12400, 0x1246e, + 0x13000, 0x1342e, + 0x16800, 0x16a38, + 0x16a40, 0x16a5e, + 0x16a60, 0x16a69, + 0x16ad0, 0x16aed, + 0x16b00, 0x16b2f, + 0x16b40, 0x16b43, + 0x16b50, 0x16b59, + 0x16b5b, 0x16b61, + 0x16b63, 0x16b77, + 0x16b7d, 0x16b8f, + 0x16f00, 0x16f44, + 0x16f50, 0x16f50, + 0x16f93, 0x16f9f, + 0x1b000, 0x1b001, + 0x1bc00, 0x1bc6a, + 0x1bc70, 0x1bc7c, + 0x1bc80, 0x1bc88, + 0x1bc90, 0x1bc99, + 0x1d360, 0x1d371, + 0x1d400, 0x1d454, + 0x1d456, 0x1d49c, + 0x1d49e, 0x1d49f, + 0x1d4a2, 0x1d4a2, + 0x1d4a5, 0x1d4a6, + 0x1d4a9, 0x1d4ac, + 0x1d4ae, 0x1d4b9, + 0x1d4bb, 0x1d4bb, + 0x1d4bd, 0x1d4c3, + 0x1d4c5, 0x1d505, + 0x1d507, 0x1d50a, + 0x1d50d, 0x1d514, + 0x1d516, 0x1d51c, + 0x1d51e, 0x1d539, + 0x1d53b, 0x1d53e, + 0x1d540, 0x1d544, + 0x1d546, 0x1d546, + 0x1d54a, 0x1d550, + 0x1d552, 0x1d6a5, + 0x1d6a8, 0x1d6c0, + 0x1d6c2, 0x1d6da, + 0x1d6dc, 0x1d6fa, + 0x1d6fc, 0x1d714, + 0x1d716, 0x1d734, + 0x1d736, 0x1d74e, + 0x1d750, 0x1d76e, + 0x1d770, 0x1d788, + 0x1d78a, 0x1d7a8, + 0x1d7aa, 0x1d7c2, + 0x1d7c4, 0x1d7cb, + 0x1d7ce, 0x1d7ff, + 0x1e800, 0x1e8c4, + 0x1e8c7, 0x1e8cf, + 0x1ee00, 0x1ee03, + 0x1ee05, 0x1ee1f, + 0x1ee21, 0x1ee22, + 0x1ee24, 0x1ee24, + 0x1ee27, 0x1ee27, + 0x1ee29, 0x1ee32, + 0x1ee34, 0x1ee37, + 0x1ee39, 0x1ee39, + 0x1ee3b, 0x1ee3b, + 0x1ee42, 0x1ee42, + 0x1ee47, 0x1ee47, + 0x1ee49, 0x1ee49, + 0x1ee4b, 0x1ee4b, + 0x1ee4d, 0x1ee4f, + 0x1ee51, 0x1ee52, + 0x1ee54, 0x1ee54, + 0x1ee57, 0x1ee57, + 0x1ee59, 0x1ee59, + 0x1ee5b, 0x1ee5b, + 0x1ee5d, 0x1ee5d, + 0x1ee5f, 0x1ee5f, + 0x1ee61, 0x1ee62, + 0x1ee64, 0x1ee64, + 0x1ee67, 0x1ee6a, + 0x1ee6c, 0x1ee72, + 0x1ee74, 0x1ee77, + 0x1ee79, 0x1ee7c, + 0x1ee7e, 0x1ee7e, + 0x1ee80, 0x1ee89, + 0x1ee8b, 0x1ee9b, + 0x1eea1, 0x1eea3, + 0x1eea5, 0x1eea9, + 0x1eeab, 0x1eebb, + 0x1f100, 0x1f10c, + 0x20000, 0x2a6d6, + 0x2a700, 0x2b734, + 0x2b740, 0x2b81d, + 0x2f800, 0x2fa1d +}; +UCP_FN(Xwd) + +static const unichar ucp_Z_def[] = { + 0x20, 0x20, + 0xa0, 0xa0, + 0x1680, 0x1680, + 0x2000, 0x200a, + 0x2028, 0x2029, + 0x202f, 0x202f, + 0x205f, 0x205f, + 0x3000, 0x3000 +}; +UCP_FN(Z) + +static const unichar ucp_Zl_def[] = { + 0x2028, 0x2028 +}; +UCP_FN(Zl) + +static const unichar ucp_Zp_def[] = { + 0x2029, 0x2029 +}; +UCP_FN(Zp) + +static const unichar ucp_Zs_def[] = { + 0x20, 0x20, + 0xa0, 0xa0, + 0x1680, 0x1680, + 0x2000, 0x200a, + 0x202f, 0x202f, + 0x205f, 0x205f, + 0x3000, 0x3000 +}; +UCP_FN(Zs) + +static const unichar ucp_Arabic_def[] = { + 0x600, 0x604, + 0x606, 0x60b, + 0x60d, 0x61a, + 0x61e, 0x61e, + 0x620, 0x63f, + 0x641, 0x64a, + 0x656, 0x65f, + 0x66a, 0x66f, + 0x671, 0x6dc, + 0x6de, 0x6ff, + 0x750, 0x77f, + 0x8a0, 0x8b2, + 0x8e4, 0x8ff, + 0xfb50, 0xfbc1, + 0xfbd3, 0xfd3d, + 0xfd50, 0xfd8f, + 0xfd92, 0xfdc7, + 0xfdf0, 0xfdfd, + 0xfe70, 0xfe74, + 0xfe76, 0xfefc, + 0x10e60, 0x10e7e, + 0x1ee00, 0x1ee03, + 0x1ee05, 0x1ee1f, + 0x1ee21, 0x1ee22, + 0x1ee24, 0x1ee24, + 0x1ee27, 0x1ee27, + 0x1ee29, 0x1ee32, + 0x1ee34, 0x1ee37, + 0x1ee39, 0x1ee39, + 0x1ee3b, 0x1ee3b, + 0x1ee42, 0x1ee42, + 0x1ee47, 0x1ee47, + 0x1ee49, 0x1ee49, + 0x1ee4b, 0x1ee4b, + 0x1ee4d, 0x1ee4f, + 0x1ee51, 0x1ee52, + 0x1ee54, 0x1ee54, + 0x1ee57, 0x1ee57, + 0x1ee59, 0x1ee59, + 0x1ee5b, 0x1ee5b, + 0x1ee5d, 0x1ee5d, + 0x1ee5f, 0x1ee5f, + 0x1ee61, 0x1ee62, + 0x1ee64, 0x1ee64, + 0x1ee67, 0x1ee6a, + 0x1ee6c, 0x1ee72, + 0x1ee74, 0x1ee77, + 0x1ee79, 0x1ee7c, + 0x1ee7e, 0x1ee7e, + 0x1ee80, 0x1ee89, + 0x1ee8b, 0x1ee9b, + 0x1eea1, 0x1eea3, + 0x1eea5, 0x1eea9, + 0x1eeab, 0x1eebb, + 0x1eef0, 0x1eef1 +}; +UCP_FN(Arabic) + +static const unichar ucp_Armenian_def[] = { + 0x531, 0x556, + 0x559, 0x55f, + 0x561, 0x587, + 0x58a, 0x58a, + 0x58d, 0x58f, + 0xfb13, 0xfb17 +}; +UCP_FN(Armenian) + +static const unichar ucp_Avestan_def[] = { + 0x10b00, 0x10b35, + 0x10b39, 0x10b3f +}; +UCP_FN(Avestan) + +static const unichar ucp_Balinese_def[] = { + 0x1b00, 0x1b4b, + 0x1b50, 0x1b7c +}; +UCP_FN(Balinese) + +static const unichar ucp_Bamum_def[] = { + 0xa6a0, 0xa6f7, + 0x16800, 0x16a38 +}; +UCP_FN(Bamum) + +static const unichar ucp_Bassa_Vah_def[] = { + 0x16ad0, 0x16aed, + 0x16af0, 0x16af5 +}; +UCP_FN(Bassa_Vah) + +static const unichar ucp_Batak_def[] = { + 0x1bc0, 0x1bf3, + 0x1bfc, 0x1bff +}; +UCP_FN(Batak) + +static const unichar ucp_Bengali_def[] = { + 0x980, 0x983, + 0x985, 0x98c, + 0x98f, 0x990, + 0x993, 0x9a8, + 0x9aa, 0x9b0, + 0x9b2, 0x9b2, + 0x9b6, 0x9b9, + 0x9bc, 0x9c4, + 0x9c7, 0x9c8, + 0x9cb, 0x9ce, + 0x9d7, 0x9d7, + 0x9dc, 0x9dd, + 0x9df, 0x9e3, + 0x9e6, 0x9fb +}; +UCP_FN(Bengali) + +static const unichar ucp_Bopomofo_def[] = { + 0x2ea, 0x2eb, + 0x3105, 0x312d, + 0x31a0, 0x31ba +}; +UCP_FN(Bopomofo) + +static const unichar ucp_Brahmi_def[] = { + 0x11000, 0x1104d, + 0x11052, 0x1106f, + 0x1107f, 0x1107f +}; +UCP_FN(Brahmi) + +static const unichar ucp_Braille_def[] = { + 0x2800, 0x28ff +}; +UCP_FN(Braille) + +static const unichar ucp_Buginese_def[] = { + 0x1a00, 0x1a1b, + 0x1a1e, 0x1a1f +}; +UCP_FN(Buginese) + +static const unichar ucp_Buhid_def[] = { + 0x1740, 0x1753 +}; +UCP_FN(Buhid) + +static const unichar ucp_Canadian_Aboriginal_def[] = { + 0x1400, 0x167f, + 0x18b0, 0x18f5 +}; +UCP_FN(Canadian_Aboriginal) + +static const unichar ucp_Carian_def[] = { + 0x102a0, 0x102d0 +}; +UCP_FN(Carian) + +static const unichar ucp_Caucasian_Albanian_def[] = { + 0x10530, 0x10563, + 0x1056f, 0x1056f +}; +UCP_FN(Caucasian_Albanian) + +static const unichar ucp_Chakma_def[] = { + 0x11100, 0x11134, + 0x11136, 0x11143 +}; +UCP_FN(Chakma) + +static const unichar ucp_Cham_def[] = { + 0xaa00, 0xaa36, + 0xaa40, 0xaa4d, + 0xaa50, 0xaa59, + 0xaa5c, 0xaa5f +}; +UCP_FN(Cham) + +static const unichar ucp_Cherokee_def[] = { + 0x13a0, 0x13f4 +}; +UCP_FN(Cherokee) + +static const unichar ucp_Common_def[] = { + 0x0, 0x40, + 0x5b, 0x60, + 0x7b, 0xa9, + 0xab, 0xb9, + 0xbb, 0xbf, + 0xd7, 0xd7, + 0xf7, 0xf7, + 0x2b9, 0x2df, + 0x2e5, 0x2e9, + 0x2ec, 0x2ff, + 0x374, 0x374, + 0x378, 0x379, + 0x37e, 0x37e, + 0x380, 0x383, + 0x385, 0x385, + 0x387, 0x387, + 0x38b, 0x38b, + 0x38d, 0x38d, + 0x3a2, 0x3a2, + 0x530, 0x530, + 0x557, 0x558, + 0x560, 0x560, + 0x588, 0x589, + 0x58b, 0x58c, + 0x590, 0x590, + 0x5c8, 0x5cf, + 0x5eb, 0x5ef, + 0x5f5, 0x5ff, + 0x605, 0x605, + 0x60c, 0x60c, + 0x61b, 0x61d, + 0x61f, 0x61f, + 0x640, 0x640, + 0x660, 0x669, + 0x6dd, 0x6dd, + 0x70e, 0x70e, + 0x74b, 0x74c, + 0x7b2, 0x7bf, + 0x7fb, 0x7ff, + 0x82e, 0x82f, + 0x83f, 0x83f, + 0x85c, 0x85d, + 0x85f, 0x89f, + 0x8b3, 0x8e3, + 0x964, 0x965, + 0x984, 0x984, + 0x98d, 0x98e, + 0x991, 0x992, + 0x9a9, 0x9a9, + 0x9b1, 0x9b1, + 0x9b3, 0x9b5, + 0x9ba, 0x9bb, + 0x9c5, 0x9c6, + 0x9c9, 0x9ca, + 0x9cf, 0x9d6, + 0x9d8, 0x9db, + 0x9de, 0x9de, + 0x9e4, 0x9e5, + 0x9fc, 0xa00, + 0xa04, 0xa04, + 0xa0b, 0xa0e, + 0xa11, 0xa12, + 0xa29, 0xa29, + 0xa31, 0xa31, + 0xa34, 0xa34, + 0xa37, 0xa37, + 0xa3a, 0xa3b, + 0xa3d, 0xa3d, + 0xa43, 0xa46, + 0xa49, 0xa4a, + 0xa4e, 0xa50, + 0xa52, 0xa58, + 0xa5d, 0xa5d, + 0xa5f, 0xa65, + 0xa76, 0xa80, + 0xa84, 0xa84, + 0xa8e, 0xa8e, + 0xa92, 0xa92, + 0xaa9, 0xaa9, + 0xab1, 0xab1, + 0xab4, 0xab4, + 0xaba, 0xabb, + 0xac6, 0xac6, + 0xaca, 0xaca, + 0xace, 0xacf, + 0xad1, 0xadf, + 0xae4, 0xae5, + 0xaf2, 0xb00, + 0xb04, 0xb04, + 0xb0d, 0xb0e, + 0xb11, 0xb12, + 0xb29, 0xb29, + 0xb31, 0xb31, + 0xb34, 0xb34, + 0xb3a, 0xb3b, + 0xb45, 0xb46, + 0xb49, 0xb4a, + 0xb4e, 0xb55, + 0xb58, 0xb5b, + 0xb5e, 0xb5e, + 0xb64, 0xb65, + 0xb78, 0xb81, + 0xb84, 0xb84, + 0xb8b, 0xb8d, + 0xb91, 0xb91, + 0xb96, 0xb98, + 0xb9b, 0xb9b, + 0xb9d, 0xb9d, + 0xba0, 0xba2, + 0xba5, 0xba7, + 0xbab, 0xbad, + 0xbba, 0xbbd, + 0xbc3, 0xbc5, + 0xbc9, 0xbc9, + 0xbce, 0xbcf, + 0xbd1, 0xbd6, + 0xbd8, 0xbe5, + 0xbfb, 0xbff, + 0xc04, 0xc04, + 0xc0d, 0xc0d, + 0xc11, 0xc11, + 0xc29, 0xc29, + 0xc3a, 0xc3c, + 0xc45, 0xc45, + 0xc49, 0xc49, + 0xc4e, 0xc54, + 0xc57, 0xc57, + 0xc5a, 0xc5f, + 0xc64, 0xc65, + 0xc70, 0xc77, + 0xc80, 0xc80, + 0xc84, 0xc84, + 0xc8d, 0xc8d, + 0xc91, 0xc91, + 0xca9, 0xca9, + 0xcb4, 0xcb4, + 0xcba, 0xcbb, + 0xcc5, 0xcc5, + 0xcc9, 0xcc9, + 0xcce, 0xcd4, + 0xcd7, 0xcdd, + 0xcdf, 0xcdf, + 0xce4, 0xce5, + 0xcf0, 0xcf0, + 0xcf3, 0xd00, + 0xd04, 0xd04, + 0xd0d, 0xd0d, + 0xd11, 0xd11, + 0xd3b, 0xd3c, + 0xd45, 0xd45, + 0xd49, 0xd49, + 0xd4f, 0xd56, + 0xd58, 0xd5f, + 0xd64, 0xd65, + 0xd76, 0xd78, + 0xd80, 0xd81, + 0xd84, 0xd84, + 0xd97, 0xd99, + 0xdb2, 0xdb2, + 0xdbc, 0xdbc, + 0xdbe, 0xdbf, + 0xdc7, 0xdc9, + 0xdcb, 0xdce, + 0xdd5, 0xdd5, + 0xdd7, 0xdd7, + 0xde0, 0xde5, + 0xdf0, 0xdf1, + 0xdf5, 0xe00, + 0xe3b, 0xe3f, + 0xe5c, 0xe80, + 0xe83, 0xe83, + 0xe85, 0xe86, + 0xe89, 0xe89, + 0xe8b, 0xe8c, + 0xe8e, 0xe93, + 0xe98, 0xe98, + 0xea0, 0xea0, + 0xea4, 0xea4, + 0xea6, 0xea6, + 0xea8, 0xea9, + 0xeac, 0xeac, + 0xeba, 0xeba, + 0xebe, 0xebf, + 0xec5, 0xec5, + 0xec7, 0xec7, + 0xece, 0xecf, + 0xeda, 0xedb, + 0xee0, 0xeff, + 0xf48, 0xf48, + 0xf6d, 0xf70, + 0xf98, 0xf98, + 0xfbd, 0xfbd, + 0xfcd, 0xfcd, + 0xfd5, 0xfd8, + 0xfdb, 0xfff, + 0x10c6, 0x10c6, + 0x10c8, 0x10cc, + 0x10ce, 0x10cf, + 0x10fb, 0x10fb, + 0x1249, 0x1249, + 0x124e, 0x124f, + 0x1257, 0x1257, + 0x1259, 0x1259, + 0x125e, 0x125f, + 0x1289, 0x1289, + 0x128e, 0x128f, + 0x12b1, 0x12b1, + 0x12b6, 0x12b7, + 0x12bf, 0x12bf, + 0x12c1, 0x12c1, + 0x12c6, 0x12c7, + 0x12d7, 0x12d7, + 0x1311, 0x1311, + 0x1316, 0x1317, + 0x135b, 0x135c, + 0x137d, 0x137f, + 0x139a, 0x139f, + 0x13f5, 0x13ff, + 0x169d, 0x169f, + 0x16eb, 0x16ed, + 0x16f9, 0x16ff, + 0x170d, 0x170d, + 0x1715, 0x171f, + 0x1735, 0x173f, + 0x1754, 0x175f, + 0x176d, 0x176d, + 0x1771, 0x1771, + 0x1774, 0x177f, + 0x17de, 0x17df, + 0x17ea, 0x17ef, + 0x17fa, 0x17ff, + 0x1802, 0x1803, + 0x1805, 0x1805, + 0x180f, 0x180f, + 0x181a, 0x181f, + 0x1878, 0x187f, + 0x18ab, 0x18af, + 0x18f6, 0x18ff, + 0x191f, 0x191f, + 0x192c, 0x192f, + 0x193c, 0x193f, + 0x1941, 0x1943, + 0x196e, 0x196f, + 0x1975, 0x197f, + 0x19ac, 0x19af, + 0x19ca, 0x19cf, + 0x19db, 0x19dd, + 0x1a1c, 0x1a1d, + 0x1a5f, 0x1a5f, + 0x1a7d, 0x1a7e, + 0x1a8a, 0x1a8f, + 0x1a9a, 0x1a9f, + 0x1aae, 0x1aaf, + 0x1abf, 0x1aff, + 0x1b4c, 0x1b4f, + 0x1b7d, 0x1b7f, + 0x1bf4, 0x1bfb, + 0x1c38, 0x1c3a, + 0x1c4a, 0x1c4c, + 0x1c80, 0x1cbf, + 0x1cc8, 0x1ccf, + 0x1cd3, 0x1cd3, + 0x1ce1, 0x1ce1, + 0x1ce9, 0x1cec, + 0x1cee, 0x1cf3, + 0x1cf5, 0x1cf7, + 0x1cfa, 0x1cff, + 0x1df6, 0x1dfb, + 0x1f16, 0x1f17, + 0x1f1e, 0x1f1f, + 0x1f46, 0x1f47, + 0x1f4e, 0x1f4f, + 0x1f58, 0x1f58, + 0x1f5a, 0x1f5a, + 0x1f5c, 0x1f5c, + 0x1f5e, 0x1f5e, + 0x1f7e, 0x1f7f, + 0x1fb5, 0x1fb5, + 0x1fc5, 0x1fc5, + 0x1fd4, 0x1fd5, + 0x1fdc, 0x1fdc, + 0x1ff0, 0x1ff1, + 0x1ff5, 0x1ff5, + 0x1fff, 0x200b, + 0x200e, 0x2070, + 0x2072, 0x207e, + 0x2080, 0x208f, + 0x209d, 0x20cf, + 0x20f1, 0x2125, + 0x2127, 0x2129, + 0x212c, 0x2131, + 0x2133, 0x214d, + 0x214f, 0x215f, + 0x2189, 0x27ff, + 0x2900, 0x2bff, + 0x2c2f, 0x2c2f, + 0x2c5f, 0x2c5f, + 0x2cf4, 0x2cf8, + 0x2d26, 0x2d26, + 0x2d28, 0x2d2c, + 0x2d2e, 0x2d2f, + 0x2d68, 0x2d6e, + 0x2d71, 0x2d7e, + 0x2d97, 0x2d9f, + 0x2da7, 0x2da7, + 0x2daf, 0x2daf, + 0x2db7, 0x2db7, + 0x2dbf, 0x2dbf, + 0x2dc7, 0x2dc7, + 0x2dcf, 0x2dcf, + 0x2dd7, 0x2dd7, + 0x2ddf, 0x2ddf, + 0x2e00, 0x2e7f, + 0x2e9a, 0x2e9a, + 0x2ef4, 0x2eff, + 0x2fd6, 0x3004, + 0x3006, 0x3006, + 0x3008, 0x3020, + 0x3030, 0x3037, + 0x303c, 0x3040, + 0x3097, 0x3098, + 0x309b, 0x309c, + 0x30a0, 0x30a0, + 0x30fb, 0x30fc, + 0x3100, 0x3104, + 0x312e, 0x3130, + 0x318f, 0x319f, + 0x31bb, 0x31ef, + 0x321f, 0x325f, + 0x327f, 0x32cf, + 0x32ff, 0x32ff, + 0x3358, 0x33ff, + 0x4db6, 0x4dff, + 0x9fcd, 0x9fff, + 0xa48d, 0xa48f, + 0xa4c7, 0xa4cf, + 0xa62c, 0xa63f, + 0xa69e, 0xa69e, + 0xa6f8, 0xa721, + 0xa788, 0xa78a, + 0xa78f, 0xa78f, + 0xa7ae, 0xa7af, + 0xa7b2, 0xa7f6, + 0xa82c, 0xa83f, + 0xa878, 0xa87f, + 0xa8c5, 0xa8cd, + 0xa8da, 0xa8df, + 0xa8fc, 0xa8ff, + 0xa92e, 0xa92e, + 0xa954, 0xa95e, + 0xa97d, 0xa97f, + 0xa9ce, 0xa9cf, + 0xa9da, 0xa9dd, + 0xa9ff, 0xa9ff, + 0xaa37, 0xaa3f, + 0xaa4e, 0xaa4f, + 0xaa5a, 0xaa5b, + 0xaac3, 0xaada, + 0xaaf7, 0xab00, + 0xab07, 0xab08, + 0xab0f, 0xab10, + 0xab17, 0xab1f, + 0xab27, 0xab27, + 0xab2f, 0xab2f, + 0xab5b, 0xab5b, + 0xab60, 0xab63, + 0xab66, 0xabbf, + 0xabee, 0xabef, + 0xabfa, 0xabff, + 0xd7a4, 0xd7af, + 0xd7c7, 0xd7ca, + 0xd7fc, 0xf8ff, + 0xfa6e, 0xfa6f, + 0xfada, 0xfaff, + 0xfb07, 0xfb12, + 0xfb18, 0xfb1c, + 0xfb37, 0xfb37, + 0xfb3d, 0xfb3d, + 0xfb3f, 0xfb3f, + 0xfb42, 0xfb42, + 0xfb45, 0xfb45, + 0xfbc2, 0xfbd2, + 0xfd3e, 0xfd4f, + 0xfd90, 0xfd91, + 0xfdc8, 0xfdef, + 0xfdfe, 0xfdff, + 0xfe10, 0xfe1f, + 0xfe2e, 0xfe6f, + 0xfe75, 0xfe75, + 0xfefd, 0xff20, + 0xff3b, 0xff40, + 0xff5b, 0xff65, + 0xff70, 0xff70, + 0xff9e, 0xff9f, + 0xffbf, 0xffc1, + 0xffc8, 0xffc9, + 0xffd0, 0xffd1, + 0xffd8, 0xffd9, + 0xffdd, 0xffff, + 0x1000c, 0x1000c, + 0x10027, 0x10027, + 0x1003b, 0x1003b, + 0x1003e, 0x1003e, + 0x1004e, 0x1004f, + 0x1005e, 0x1007f, + 0x100fb, 0x1013f, + 0x1018d, 0x1019f, + 0x101a1, 0x101fc, + 0x101fe, 0x1027f, + 0x1029d, 0x1029f, + 0x102d1, 0x102df, + 0x102e1, 0x102ff, + 0x10324, 0x1032f, + 0x1034b, 0x1034f, + 0x1037b, 0x1037f, + 0x1039e, 0x1039e, + 0x103c4, 0x103c7, + 0x103d6, 0x103ff, + 0x1049e, 0x1049f, + 0x104aa, 0x104ff, + 0x10528, 0x1052f, + 0x10564, 0x1056e, + 0x10570, 0x105ff, + 0x10737, 0x1073f, + 0x10756, 0x1075f, + 0x10768, 0x107ff, + 0x10806, 0x10807, + 0x10809, 0x10809, + 0x10836, 0x10836, + 0x10839, 0x1083b, + 0x1083d, 0x1083e, + 0x10856, 0x10856, + 0x1089f, 0x108a6, + 0x108b0, 0x108ff, + 0x1091c, 0x1091e, + 0x1093a, 0x1093e, + 0x10940, 0x1097f, + 0x109b8, 0x109bd, + 0x109c0, 0x109ff, + 0x10a04, 0x10a04, + 0x10a07, 0x10a0b, + 0x10a14, 0x10a14, + 0x10a18, 0x10a18, + 0x10a34, 0x10a37, + 0x10a3b, 0x10a3e, + 0x10a48, 0x10a4f, + 0x10a59, 0x10a5f, + 0x10aa0, 0x10abf, + 0x10ae7, 0x10aea, + 0x10af7, 0x10aff, + 0x10b36, 0x10b38, + 0x10b56, 0x10b57, + 0x10b73, 0x10b77, + 0x10b92, 0x10b98, + 0x10b9d, 0x10ba8, + 0x10bb0, 0x10bff, + 0x10c49, 0x10e5f, + 0x10e7f, 0x10fff, + 0x1104e, 0x11051, + 0x11070, 0x1107e, + 0x110c2, 0x110cf, + 0x110e9, 0x110ef, + 0x110fa, 0x110ff, + 0x11135, 0x11135, + 0x11144, 0x1114f, + 0x11177, 0x1117f, + 0x111c9, 0x111cc, + 0x111ce, 0x111cf, + 0x111db, 0x111e0, + 0x111f5, 0x111ff, + 0x11212, 0x11212, + 0x1123e, 0x112af, + 0x112eb, 0x112ef, + 0x112fa, 0x11300, + 0x11304, 0x11304, + 0x1130d, 0x1130e, + 0x11311, 0x11312, + 0x11329, 0x11329, + 0x11331, 0x11331, + 0x11334, 0x11334, + 0x1133a, 0x1133b, + 0x11345, 0x11346, + 0x11349, 0x1134a, + 0x1134e, 0x11356, + 0x11358, 0x1135c, + 0x11364, 0x11365, + 0x1136d, 0x1136f, + 0x11375, 0x1147f, + 0x114c8, 0x114cf, + 0x114da, 0x1157f, + 0x115b6, 0x115b7, + 0x115ca, 0x115ff, + 0x11645, 0x1164f, + 0x1165a, 0x1167f, + 0x116b8, 0x116bf, + 0x116ca, 0x1189f, + 0x118f3, 0x118fe, + 0x11900, 0x11abf, + 0x11af9, 0x11fff, + 0x12399, 0x123ff, + 0x1246f, 0x1246f, + 0x12475, 0x12fff, + 0x1342f, 0x167ff, + 0x16a39, 0x16a3f, + 0x16a5f, 0x16a5f, + 0x16a6a, 0x16a6d, + 0x16a70, 0x16acf, + 0x16aee, 0x16aef, + 0x16af6, 0x16aff, + 0x16b46, 0x16b4f, + 0x16b5a, 0x16b5a, + 0x16b62, 0x16b62, + 0x16b78, 0x16b7c, + 0x16b90, 0x16eff, + 0x16f45, 0x16f4f, + 0x16f7f, 0x16f8e, + 0x16fa0, 0x1afff, + 0x1b002, 0x1bbff, + 0x1bc6b, 0x1bc6f, + 0x1bc7d, 0x1bc7f, + 0x1bc89, 0x1bc8f, + 0x1bc9a, 0x1bc9b, + 0x1bca0, 0x1d166, + 0x1d16a, 0x1d17a, + 0x1d183, 0x1d184, + 0x1d18c, 0x1d1a9, + 0x1d1ae, 0x1d1ff, + 0x1d246, 0x1e7ff, + 0x1e8c5, 0x1e8c6, + 0x1e8d7, 0x1edff, + 0x1ee04, 0x1ee04, + 0x1ee20, 0x1ee20, + 0x1ee23, 0x1ee23, + 0x1ee25, 0x1ee26, + 0x1ee28, 0x1ee28, + 0x1ee33, 0x1ee33, + 0x1ee38, 0x1ee38, + 0x1ee3a, 0x1ee3a, + 0x1ee3c, 0x1ee41, + 0x1ee43, 0x1ee46, + 0x1ee48, 0x1ee48, + 0x1ee4a, 0x1ee4a, + 0x1ee4c, 0x1ee4c, + 0x1ee50, 0x1ee50, + 0x1ee53, 0x1ee53, + 0x1ee55, 0x1ee56, + 0x1ee58, 0x1ee58, + 0x1ee5a, 0x1ee5a, + 0x1ee5c, 0x1ee5c, + 0x1ee5e, 0x1ee5e, + 0x1ee60, 0x1ee60, + 0x1ee63, 0x1ee63, + 0x1ee65, 0x1ee66, + 0x1ee6b, 0x1ee6b, + 0x1ee73, 0x1ee73, + 0x1ee78, 0x1ee78, + 0x1ee7d, 0x1ee7d, + 0x1ee7f, 0x1ee7f, + 0x1ee8a, 0x1ee8a, + 0x1ee9c, 0x1eea0, + 0x1eea4, 0x1eea4, + 0x1eeaa, 0x1eeaa, + 0x1eebc, 0x1eeef, + 0x1eef2, 0x1f1ff, + 0x1f201, 0x1ffff, + 0x2a6d7, 0x2a6ff, + 0x2b735, 0x2b73f, + 0x2b81e, 0x2f7ff, + 0x2fa1e, 0xe00ff, + 0xe01f0, 0x10ffff +}; +UCP_FN(Common) + +static const unichar ucp_Coptic_def[] = { + 0x3e2, 0x3ef, + 0x2c80, 0x2cf3, + 0x2cf9, 0x2cff +}; +UCP_FN(Coptic) + +static const unichar ucp_Cuneiform_def[] = { + 0x12000, 0x12398, + 0x12400, 0x1246e, + 0x12470, 0x12474 +}; +UCP_FN(Cuneiform) + +static const unichar ucp_Cypriot_def[] = { + 0x10800, 0x10805, + 0x10808, 0x10808, + 0x1080a, 0x10835, + 0x10837, 0x10838, + 0x1083c, 0x1083c, + 0x1083f, 0x1083f +}; +UCP_FN(Cypriot) + +static const unichar ucp_Cyrillic_def[] = { + 0x400, 0x484, + 0x487, 0x52f, + 0x1d2b, 0x1d2b, + 0x1d78, 0x1d78, + 0x2de0, 0x2dff, + 0xa640, 0xa69d, + 0xa69f, 0xa69f +}; +UCP_FN(Cyrillic) + +static const unichar ucp_Deseret_def[] = { + 0x10400, 0x1044f +}; +UCP_FN(Deseret) + +static const unichar ucp_Devanagari_def[] = { + 0x900, 0x950, + 0x953, 0x963, + 0x966, 0x97f, + 0xa8e0, 0xa8fb +}; +UCP_FN(Devanagari) + +static const unichar ucp_Duployan_def[] = { + 0x1bc00, 0x1bc6a, + 0x1bc70, 0x1bc7c, + 0x1bc80, 0x1bc88, + 0x1bc90, 0x1bc99, + 0x1bc9c, 0x1bc9f +}; +UCP_FN(Duployan) + +static const unichar ucp_Egyptian_Hieroglyphs_def[] = { + 0x13000, 0x1342e +}; +UCP_FN(Egyptian_Hieroglyphs) + +static const unichar ucp_Elbasan_def[] = { + 0x10500, 0x10527 +}; +UCP_FN(Elbasan) + +static const unichar ucp_Ethiopic_def[] = { + 0x1200, 0x1248, + 0x124a, 0x124d, + 0x1250, 0x1256, + 0x1258, 0x1258, + 0x125a, 0x125d, + 0x1260, 0x1288, + 0x128a, 0x128d, + 0x1290, 0x12b0, + 0x12b2, 0x12b5, + 0x12b8, 0x12be, + 0x12c0, 0x12c0, + 0x12c2, 0x12c5, + 0x12c8, 0x12d6, + 0x12d8, 0x1310, + 0x1312, 0x1315, + 0x1318, 0x135a, + 0x135d, 0x137c, + 0x1380, 0x1399, + 0x2d80, 0x2d96, + 0x2da0, 0x2da6, + 0x2da8, 0x2dae, + 0x2db0, 0x2db6, + 0x2db8, 0x2dbe, + 0x2dc0, 0x2dc6, + 0x2dc8, 0x2dce, + 0x2dd0, 0x2dd6, + 0x2dd8, 0x2dde, + 0xab01, 0xab06, + 0xab09, 0xab0e, + 0xab11, 0xab16, + 0xab20, 0xab26, + 0xab28, 0xab2e +}; +UCP_FN(Ethiopic) + +static const unichar ucp_Georgian_def[] = { + 0x10a0, 0x10c5, + 0x10c7, 0x10c7, + 0x10cd, 0x10cd, + 0x10d0, 0x10fa, + 0x10fc, 0x10ff, + 0x2d00, 0x2d25, + 0x2d27, 0x2d27, + 0x2d2d, 0x2d2d +}; +UCP_FN(Georgian) + +static const unichar ucp_Glagolitic_def[] = { + 0x2c00, 0x2c2e, + 0x2c30, 0x2c5e +}; +UCP_FN(Glagolitic) + +static const unichar ucp_Gothic_def[] = { + 0x10330, 0x1034a +}; +UCP_FN(Gothic) + +static const unichar ucp_Grantha_def[] = { + 0x11301, 0x11303, + 0x11305, 0x1130c, + 0x1130f, 0x11310, + 0x11313, 0x11328, + 0x1132a, 0x11330, + 0x11332, 0x11333, + 0x11335, 0x11339, + 0x1133c, 0x11344, + 0x11347, 0x11348, + 0x1134b, 0x1134d, + 0x11357, 0x11357, + 0x1135d, 0x11363, + 0x11366, 0x1136c, + 0x11370, 0x11374 +}; +UCP_FN(Grantha) + +static const unichar ucp_Greek_def[] = { + 0x370, 0x373, + 0x375, 0x377, + 0x37a, 0x37d, + 0x37f, 0x37f, + 0x384, 0x384, + 0x386, 0x386, + 0x388, 0x38a, + 0x38c, 0x38c, + 0x38e, 0x3a1, + 0x3a3, 0x3e1, + 0x3f0, 0x3ff, + 0x1d26, 0x1d2a, + 0x1d5d, 0x1d61, + 0x1d66, 0x1d6a, + 0x1dbf, 0x1dbf, + 0x1f00, 0x1f15, + 0x1f18, 0x1f1d, + 0x1f20, 0x1f45, + 0x1f48, 0x1f4d, + 0x1f50, 0x1f57, + 0x1f59, 0x1f59, + 0x1f5b, 0x1f5b, + 0x1f5d, 0x1f5d, + 0x1f5f, 0x1f7d, + 0x1f80, 0x1fb4, + 0x1fb6, 0x1fc4, + 0x1fc6, 0x1fd3, + 0x1fd6, 0x1fdb, + 0x1fdd, 0x1fef, + 0x1ff2, 0x1ff4, + 0x1ff6, 0x1ffe, + 0x2126, 0x2126, + 0xab65, 0xab65, + 0x10140, 0x1018c, + 0x101a0, 0x101a0, + 0x1d200, 0x1d245 +}; +UCP_FN(Greek) + +static const unichar ucp_Gujarati_def[] = { + 0xa81, 0xa83, + 0xa85, 0xa8d, + 0xa8f, 0xa91, + 0xa93, 0xaa8, + 0xaaa, 0xab0, + 0xab2, 0xab3, + 0xab5, 0xab9, + 0xabc, 0xac5, + 0xac7, 0xac9, + 0xacb, 0xacd, + 0xad0, 0xad0, + 0xae0, 0xae3, + 0xae6, 0xaf1 +}; +UCP_FN(Gujarati) + +static const unichar ucp_Gurmukhi_def[] = { + 0xa01, 0xa03, + 0xa05, 0xa0a, + 0xa0f, 0xa10, + 0xa13, 0xa28, + 0xa2a, 0xa30, + 0xa32, 0xa33, + 0xa35, 0xa36, + 0xa38, 0xa39, + 0xa3c, 0xa3c, + 0xa3e, 0xa42, + 0xa47, 0xa48, + 0xa4b, 0xa4d, + 0xa51, 0xa51, + 0xa59, 0xa5c, + 0xa5e, 0xa5e, + 0xa66, 0xa75 +}; +UCP_FN(Gurmukhi) + +static const unichar ucp_Han_def[] = { + 0x2e80, 0x2e99, + 0x2e9b, 0x2ef3, + 0x2f00, 0x2fd5, + 0x3005, 0x3005, + 0x3007, 0x3007, + 0x3021, 0x3029, + 0x3038, 0x303b, + 0x3400, 0x4db5, + 0x4e00, 0x9fcc, + 0xf900, 0xfa6d, + 0xfa70, 0xfad9, + 0x20000, 0x2a6d6, + 0x2a700, 0x2b734, + 0x2b740, 0x2b81d, + 0x2f800, 0x2fa1d +}; +UCP_FN(Han) + +static const unichar ucp_Hangul_def[] = { + 0x1100, 0x11ff, + 0x302e, 0x302f, + 0x3131, 0x318e, + 0x3200, 0x321e, + 0x3260, 0x327e, + 0xa960, 0xa97c, + 0xac00, 0xd7a3, + 0xd7b0, 0xd7c6, + 0xd7cb, 0xd7fb, + 0xffa0, 0xffbe, + 0xffc2, 0xffc7, + 0xffca, 0xffcf, + 0xffd2, 0xffd7, + 0xffda, 0xffdc +}; +UCP_FN(Hangul) + +static const unichar ucp_Hanunoo_def[] = { + 0x1720, 0x1734 +}; +UCP_FN(Hanunoo) + +static const unichar ucp_Hebrew_def[] = { + 0x591, 0x5c7, + 0x5d0, 0x5ea, + 0x5f0, 0x5f4, + 0xfb1d, 0xfb36, + 0xfb38, 0xfb3c, + 0xfb3e, 0xfb3e, + 0xfb40, 0xfb41, + 0xfb43, 0xfb44, + 0xfb46, 0xfb4f +}; +UCP_FN(Hebrew) + +static const unichar ucp_Hiragana_def[] = { + 0x3041, 0x3096, + 0x309d, 0x309f, + 0x1b001, 0x1b001, + 0x1f200, 0x1f200 +}; +UCP_FN(Hiragana) + +static const unichar ucp_Imperial_Aramaic_def[] = { + 0x10840, 0x10855, + 0x10857, 0x1085f +}; +UCP_FN(Imperial_Aramaic) + +static const unichar ucp_Inherited_def[] = { + 0x300, 0x36f, + 0x485, 0x486, + 0x64b, 0x655, + 0x670, 0x670, + 0x951, 0x952, + 0x1ab0, 0x1abe, + 0x1cd0, 0x1cd2, + 0x1cd4, 0x1ce0, + 0x1ce2, 0x1ce8, + 0x1ced, 0x1ced, + 0x1cf4, 0x1cf4, + 0x1cf8, 0x1cf9, + 0x1dc0, 0x1df5, + 0x1dfc, 0x1dff, + 0x200c, 0x200d, + 0x20d0, 0x20f0, + 0x302a, 0x302d, + 0x3099, 0x309a, + 0xfe00, 0xfe0f, + 0xfe20, 0xfe2d, + 0x101fd, 0x101fd, + 0x102e0, 0x102e0, + 0x1d167, 0x1d169, + 0x1d17b, 0x1d182, + 0x1d185, 0x1d18b, + 0x1d1aa, 0x1d1ad, + 0xe0100, 0xe01ef +}; +UCP_FN(Inherited) + +static const unichar ucp_Inscriptional_Pahlavi_def[] = { + 0x10b60, 0x10b72, + 0x10b78, 0x10b7f +}; +UCP_FN(Inscriptional_Pahlavi) + +static const unichar ucp_Inscriptional_Parthian_def[] = { + 0x10b40, 0x10b55, + 0x10b58, 0x10b5f +}; +UCP_FN(Inscriptional_Parthian) + +static const unichar ucp_Javanese_def[] = { + 0xa980, 0xa9cd, + 0xa9d0, 0xa9d9, + 0xa9de, 0xa9df +}; +UCP_FN(Javanese) + +static const unichar ucp_Kaithi_def[] = { + 0x11080, 0x110c1 +}; +UCP_FN(Kaithi) + +static const unichar ucp_Kannada_def[] = { + 0xc81, 0xc83, + 0xc85, 0xc8c, + 0xc8e, 0xc90, + 0xc92, 0xca8, + 0xcaa, 0xcb3, + 0xcb5, 0xcb9, + 0xcbc, 0xcc4, + 0xcc6, 0xcc8, + 0xcca, 0xccd, + 0xcd5, 0xcd6, + 0xcde, 0xcde, + 0xce0, 0xce3, + 0xce6, 0xcef, + 0xcf1, 0xcf2 +}; +UCP_FN(Kannada) + +static const unichar ucp_Katakana_def[] = { + 0x30a1, 0x30fa, + 0x30fd, 0x30ff, + 0x31f0, 0x31ff, + 0x32d0, 0x32fe, + 0x3300, 0x3357, + 0xff66, 0xff6f, + 0xff71, 0xff9d, + 0x1b000, 0x1b000 +}; +UCP_FN(Katakana) + +static const unichar ucp_Kayah_Li_def[] = { + 0xa900, 0xa92d, + 0xa92f, 0xa92f +}; +UCP_FN(Kayah_Li) + +static const unichar ucp_Kharoshthi_def[] = { + 0x10a00, 0x10a03, + 0x10a05, 0x10a06, + 0x10a0c, 0x10a13, + 0x10a15, 0x10a17, + 0x10a19, 0x10a33, + 0x10a38, 0x10a3a, + 0x10a3f, 0x10a47, + 0x10a50, 0x10a58 +}; +UCP_FN(Kharoshthi) + +static const unichar ucp_Khmer_def[] = { + 0x1780, 0x17dd, + 0x17e0, 0x17e9, + 0x17f0, 0x17f9, + 0x19e0, 0x19ff +}; +UCP_FN(Khmer) + +static const unichar ucp_Khojki_def[] = { + 0x11200, 0x11211, + 0x11213, 0x1123d +}; +UCP_FN(Khojki) + +static const unichar ucp_Khudawadi_def[] = { + 0x112b0, 0x112ea, + 0x112f0, 0x112f9 +}; +UCP_FN(Khudawadi) + +static const unichar ucp_Lao_def[] = { + 0xe81, 0xe82, + 0xe84, 0xe84, + 0xe87, 0xe88, + 0xe8a, 0xe8a, + 0xe8d, 0xe8d, + 0xe94, 0xe97, + 0xe99, 0xe9f, + 0xea1, 0xea3, + 0xea5, 0xea5, + 0xea7, 0xea7, + 0xeaa, 0xeab, + 0xead, 0xeb9, + 0xebb, 0xebd, + 0xec0, 0xec4, + 0xec6, 0xec6, + 0xec8, 0xecd, + 0xed0, 0xed9, + 0xedc, 0xedf +}; +UCP_FN(Lao) + +static const unichar ucp_Latin_def[] = { + 0x41, 0x5a, + 0x61, 0x7a, + 0xaa, 0xaa, + 0xba, 0xba, + 0xc0, 0xd6, + 0xd8, 0xf6, + 0xf8, 0x2b8, + 0x2e0, 0x2e4, + 0x1d00, 0x1d25, + 0x1d2c, 0x1d5c, + 0x1d62, 0x1d65, + 0x1d6b, 0x1d77, + 0x1d79, 0x1dbe, + 0x1e00, 0x1eff, + 0x2071, 0x2071, + 0x207f, 0x207f, + 0x2090, 0x209c, + 0x212a, 0x212b, + 0x2132, 0x2132, + 0x214e, 0x214e, + 0x2160, 0x2188, + 0x2c60, 0x2c7f, + 0xa722, 0xa787, + 0xa78b, 0xa78e, + 0xa790, 0xa7ad, + 0xa7b0, 0xa7b1, + 0xa7f7, 0xa7ff, + 0xab30, 0xab5a, + 0xab5c, 0xab5f, + 0xab64, 0xab64, + 0xfb00, 0xfb06, + 0xff21, 0xff3a, + 0xff41, 0xff5a +}; +UCP_FN(Latin) + +static const unichar ucp_Lepcha_def[] = { + 0x1c00, 0x1c37, + 0x1c3b, 0x1c49, + 0x1c4d, 0x1c4f +}; +UCP_FN(Lepcha) + +static const unichar ucp_Limbu_def[] = { + 0x1900, 0x191e, + 0x1920, 0x192b, + 0x1930, 0x193b, + 0x1940, 0x1940, + 0x1944, 0x194f +}; +UCP_FN(Limbu) + +static const unichar ucp_Linear_A_def[] = { + 0x10600, 0x10736, + 0x10740, 0x10755, + 0x10760, 0x10767 +}; +UCP_FN(Linear_A) + +static const unichar ucp_Linear_B_def[] = { + 0x10000, 0x1000b, + 0x1000d, 0x10026, + 0x10028, 0x1003a, + 0x1003c, 0x1003d, + 0x1003f, 0x1004d, + 0x10050, 0x1005d, + 0x10080, 0x100fa +}; +UCP_FN(Linear_B) + +static const unichar ucp_Lisu_def[] = { + 0xa4d0, 0xa4ff +}; +UCP_FN(Lisu) + +static const unichar ucp_Lycian_def[] = { + 0x10280, 0x1029c +}; +UCP_FN(Lycian) + +static const unichar ucp_Lydian_def[] = { + 0x10920, 0x10939, + 0x1093f, 0x1093f +}; +UCP_FN(Lydian) + +static const unichar ucp_Mahajani_def[] = { + 0x11150, 0x11176 +}; +UCP_FN(Mahajani) + +static const unichar ucp_Malayalam_def[] = { + 0xd01, 0xd03, + 0xd05, 0xd0c, + 0xd0e, 0xd10, + 0xd12, 0xd3a, + 0xd3d, 0xd44, + 0xd46, 0xd48, + 0xd4a, 0xd4e, + 0xd57, 0xd57, + 0xd60, 0xd63, + 0xd66, 0xd75, + 0xd79, 0xd7f +}; +UCP_FN(Malayalam) + +static const unichar ucp_Mandaic_def[] = { + 0x840, 0x85b, + 0x85e, 0x85e +}; +UCP_FN(Mandaic) + +static const unichar ucp_Manichaean_def[] = { + 0x10ac0, 0x10ae6, + 0x10aeb, 0x10af6 +}; +UCP_FN(Manichaean) + +static const unichar ucp_Meetei_Mayek_def[] = { + 0xaae0, 0xaaf6, + 0xabc0, 0xabed, + 0xabf0, 0xabf9 +}; +UCP_FN(Meetei_Mayek) + +static const unichar ucp_Mende_Kikakui_def[] = { + 0x1e800, 0x1e8c4, + 0x1e8c7, 0x1e8d6 +}; +UCP_FN(Mende_Kikakui) + +static const unichar ucp_Meroitic_Cursive_def[] = { + 0x109a0, 0x109b7, + 0x109be, 0x109bf +}; +UCP_FN(Meroitic_Cursive) + +static const unichar ucp_Meroitic_Hieroglyphs_def[] = { + 0x10980, 0x1099f +}; +UCP_FN(Meroitic_Hieroglyphs) + +static const unichar ucp_Miao_def[] = { + 0x16f00, 0x16f44, + 0x16f50, 0x16f7e, + 0x16f8f, 0x16f9f +}; +UCP_FN(Miao) + +static const unichar ucp_Modi_def[] = { + 0x11600, 0x11644, + 0x11650, 0x11659 +}; +UCP_FN(Modi) + +static const unichar ucp_Mongolian_def[] = { + 0x1800, 0x1801, + 0x1804, 0x1804, + 0x1806, 0x180e, + 0x1810, 0x1819, + 0x1820, 0x1877, + 0x1880, 0x18aa +}; +UCP_FN(Mongolian) + +static const unichar ucp_Mro_def[] = { + 0x16a40, 0x16a5e, + 0x16a60, 0x16a69, + 0x16a6e, 0x16a6f +}; +UCP_FN(Mro) + +static const unichar ucp_Myanmar_def[] = { + 0x1000, 0x109f, + 0xa9e0, 0xa9fe, + 0xaa60, 0xaa7f +}; +UCP_FN(Myanmar) + +static const unichar ucp_Nabataean_def[] = { + 0x10880, 0x1089e, + 0x108a7, 0x108af +}; +UCP_FN(Nabataean) + +static const unichar ucp_New_Tai_Lue_def[] = { + 0x1980, 0x19ab, + 0x19b0, 0x19c9, + 0x19d0, 0x19da, + 0x19de, 0x19df +}; +UCP_FN(New_Tai_Lue) + +static const unichar ucp_Nko_def[] = { + 0x7c0, 0x7fa +}; +UCP_FN(Nko) + +static const unichar ucp_Ogham_def[] = { + 0x1680, 0x169c +}; +UCP_FN(Ogham) + +static const unichar ucp_Ol_Chiki_def[] = { + 0x1c50, 0x1c7f +}; +UCP_FN(Ol_Chiki) + +static const unichar ucp_Old_Italic_def[] = { + 0x10300, 0x10323 +}; +UCP_FN(Old_Italic) + +static const unichar ucp_Old_North_Arabian_def[] = { + 0x10a80, 0x10a9f +}; +UCP_FN(Old_North_Arabian) + +static const unichar ucp_Old_Permic_def[] = { + 0x10350, 0x1037a +}; +UCP_FN(Old_Permic) + +static const unichar ucp_Old_Persian_def[] = { + 0x103a0, 0x103c3, + 0x103c8, 0x103d5 +}; +UCP_FN(Old_Persian) + +static const unichar ucp_Old_South_Arabian_def[] = { + 0x10a60, 0x10a7f +}; +UCP_FN(Old_South_Arabian) + +static const unichar ucp_Old_Turkic_def[] = { + 0x10c00, 0x10c48 +}; +UCP_FN(Old_Turkic) + +static const unichar ucp_Oriya_def[] = { + 0xb01, 0xb03, + 0xb05, 0xb0c, + 0xb0f, 0xb10, + 0xb13, 0xb28, + 0xb2a, 0xb30, + 0xb32, 0xb33, + 0xb35, 0xb39, + 0xb3c, 0xb44, + 0xb47, 0xb48, + 0xb4b, 0xb4d, + 0xb56, 0xb57, + 0xb5c, 0xb5d, + 0xb5f, 0xb63, + 0xb66, 0xb77 +}; +UCP_FN(Oriya) + +static const unichar ucp_Osmanya_def[] = { + 0x10480, 0x1049d, + 0x104a0, 0x104a9 +}; +UCP_FN(Osmanya) + +static const unichar ucp_Pahawh_Hmong_def[] = { + 0x16b00, 0x16b45, + 0x16b50, 0x16b59, + 0x16b5b, 0x16b61, + 0x16b63, 0x16b77, + 0x16b7d, 0x16b8f +}; +UCP_FN(Pahawh_Hmong) + +static const unichar ucp_Palmyrene_def[] = { + 0x10860, 0x1087f +}; +UCP_FN(Palmyrene) + +static const unichar ucp_Pau_Cin_Hau_def[] = { + 0x11ac0, 0x11af8 +}; +UCP_FN(Pau_Cin_Hau) + +static const unichar ucp_Phags_Pa_def[] = { + 0xa840, 0xa877 +}; +UCP_FN(Phags_Pa) + +static const unichar ucp_Phoenician_def[] = { + 0x10900, 0x1091b, + 0x1091f, 0x1091f +}; +UCP_FN(Phoenician) + +static const unichar ucp_Psalter_Pahlavi_def[] = { + 0x10b80, 0x10b91, + 0x10b99, 0x10b9c, + 0x10ba9, 0x10baf +}; +UCP_FN(Psalter_Pahlavi) + +static const unichar ucp_Rejang_def[] = { + 0xa930, 0xa953, + 0xa95f, 0xa95f +}; +UCP_FN(Rejang) + +static const unichar ucp_Runic_def[] = { + 0x16a0, 0x16ea, + 0x16ee, 0x16f8 +}; +UCP_FN(Runic) + +static const unichar ucp_Samaritan_def[] = { + 0x800, 0x82d, + 0x830, 0x83e +}; +UCP_FN(Samaritan) + +static const unichar ucp_Saurashtra_def[] = { + 0xa880, 0xa8c4, + 0xa8ce, 0xa8d9 +}; +UCP_FN(Saurashtra) + +static const unichar ucp_Sharada_def[] = { + 0x11180, 0x111c8, + 0x111cd, 0x111cd, + 0x111d0, 0x111da +}; +UCP_FN(Sharada) + +static const unichar ucp_Shavian_def[] = { + 0x10450, 0x1047f +}; +UCP_FN(Shavian) + +static const unichar ucp_Siddham_def[] = { + 0x11580, 0x115b5, + 0x115b8, 0x115c9 +}; +UCP_FN(Siddham) + +static const unichar ucp_Sinhala_def[] = { + 0xd82, 0xd83, + 0xd85, 0xd96, + 0xd9a, 0xdb1, + 0xdb3, 0xdbb, + 0xdbd, 0xdbd, + 0xdc0, 0xdc6, + 0xdca, 0xdca, + 0xdcf, 0xdd4, + 0xdd6, 0xdd6, + 0xdd8, 0xddf, + 0xde6, 0xdef, + 0xdf2, 0xdf4, + 0x111e1, 0x111f4 +}; +UCP_FN(Sinhala) + +static const unichar ucp_Sora_Sompeng_def[] = { + 0x110d0, 0x110e8, + 0x110f0, 0x110f9 +}; +UCP_FN(Sora_Sompeng) + +static const unichar ucp_Sundanese_def[] = { + 0x1b80, 0x1bbf, + 0x1cc0, 0x1cc7 +}; +UCP_FN(Sundanese) + +static const unichar ucp_Syloti_Nagri_def[] = { + 0xa800, 0xa82b +}; +UCP_FN(Syloti_Nagri) + +static const unichar ucp_Syriac_def[] = { + 0x700, 0x70d, + 0x70f, 0x74a, + 0x74d, 0x74f +}; +UCP_FN(Syriac) + +static const unichar ucp_Tagalog_def[] = { + 0x1700, 0x170c, + 0x170e, 0x1714 +}; +UCP_FN(Tagalog) + +static const unichar ucp_Tagbanwa_def[] = { + 0x1760, 0x176c, + 0x176e, 0x1770, + 0x1772, 0x1773 +}; +UCP_FN(Tagbanwa) + +static const unichar ucp_Tai_Le_def[] = { + 0x1950, 0x196d, + 0x1970, 0x1974 +}; +UCP_FN(Tai_Le) + +static const unichar ucp_Tai_Tham_def[] = { + 0x1a20, 0x1a5e, + 0x1a60, 0x1a7c, + 0x1a7f, 0x1a89, + 0x1a90, 0x1a99, + 0x1aa0, 0x1aad +}; +UCP_FN(Tai_Tham) + +static const unichar ucp_Tai_Viet_def[] = { + 0xaa80, 0xaac2, + 0xaadb, 0xaadf +}; +UCP_FN(Tai_Viet) + +static const unichar ucp_Takri_def[] = { + 0x11680, 0x116b7, + 0x116c0, 0x116c9 +}; +UCP_FN(Takri) + +static const unichar ucp_Tamil_def[] = { + 0xb82, 0xb83, + 0xb85, 0xb8a, + 0xb8e, 0xb90, + 0xb92, 0xb95, + 0xb99, 0xb9a, + 0xb9c, 0xb9c, + 0xb9e, 0xb9f, + 0xba3, 0xba4, + 0xba8, 0xbaa, + 0xbae, 0xbb9, + 0xbbe, 0xbc2, + 0xbc6, 0xbc8, + 0xbca, 0xbcd, + 0xbd0, 0xbd0, + 0xbd7, 0xbd7, + 0xbe6, 0xbfa +}; +UCP_FN(Tamil) + +static const unichar ucp_Telugu_def[] = { + 0xc00, 0xc03, + 0xc05, 0xc0c, + 0xc0e, 0xc10, + 0xc12, 0xc28, + 0xc2a, 0xc39, + 0xc3d, 0xc44, + 0xc46, 0xc48, + 0xc4a, 0xc4d, + 0xc55, 0xc56, + 0xc58, 0xc59, + 0xc60, 0xc63, + 0xc66, 0xc6f, + 0xc78, 0xc7f +}; +UCP_FN(Telugu) + +static const unichar ucp_Thaana_def[] = { + 0x780, 0x7b1 +}; +UCP_FN(Thaana) + +static const unichar ucp_Thai_def[] = { + 0xe01, 0xe3a, + 0xe40, 0xe5b +}; +UCP_FN(Thai) + +static const unichar ucp_Tibetan_def[] = { + 0xf00, 0xf47, + 0xf49, 0xf6c, + 0xf71, 0xf97, + 0xf99, 0xfbc, + 0xfbe, 0xfcc, + 0xfce, 0xfd4, + 0xfd9, 0xfda +}; +UCP_FN(Tibetan) + +static const unichar ucp_Tifinagh_def[] = { + 0x2d30, 0x2d67, + 0x2d6f, 0x2d70, + 0x2d7f, 0x2d7f +}; +UCP_FN(Tifinagh) + +static const unichar ucp_Tirhuta_def[] = { + 0x11480, 0x114c7, + 0x114d0, 0x114d9 +}; +UCP_FN(Tirhuta) + +static const unichar ucp_Ugaritic_def[] = { + 0x10380, 0x1039d, + 0x1039f, 0x1039f +}; +UCP_FN(Ugaritic) + +static const unichar ucp_Vai_def[] = { + 0xa500, 0xa62b +}; +UCP_FN(Vai) + +static const unichar ucp_Warang_Citi_def[] = { + 0x118a0, 0x118f2, + 0x118ff, 0x118ff +}; +UCP_FN(Warang_Citi) + +static const unichar ucp_Yi_def[] = { + 0xa000, 0xa48c, + 0xa490, 0xa4c6 +}; +UCP_FN(Yi) + +static const unicase ucp_caseless_def[] = { + {0x41, 0x61}, + {0x42, 0x62}, + {0x43, 0x63}, + {0x44, 0x64}, + {0x45, 0x65}, + {0x46, 0x66}, + {0x47, 0x67}, + {0x48, 0x68}, + {0x49, 0x69}, + {0x4a, 0x6a}, + {0x4b, 0x6b}, + {0x4b, 0x212a}, + {0x4c, 0x6c}, + {0x4d, 0x6d}, + {0x4e, 0x6e}, + {0x4f, 0x6f}, + {0x50, 0x70}, + {0x51, 0x71}, + {0x52, 0x72}, + {0x53, 0x73}, + {0x53, 0x17f}, + {0x54, 0x74}, + {0x55, 0x75}, + {0x56, 0x76}, + {0x57, 0x77}, + {0x58, 0x78}, + {0x59, 0x79}, + {0x5a, 0x7a}, + {0x61, 0x41}, + {0x62, 0x42}, + {0x63, 0x43}, + {0x64, 0x44}, + {0x65, 0x45}, + {0x66, 0x46}, + {0x67, 0x47}, + {0x68, 0x48}, + {0x69, 0x49}, + {0x6a, 0x4a}, + {0x6b, 0x4b}, + {0x6b, 0x212a}, + {0x6c, 0x4c}, + {0x6d, 0x4d}, + {0x6e, 0x4e}, + {0x6f, 0x4f}, + {0x70, 0x50}, + {0x71, 0x51}, + {0x72, 0x52}, + {0x73, 0x53}, + {0x73, 0x17f}, + {0x74, 0x54}, + {0x75, 0x55}, + {0x76, 0x56}, + {0x77, 0x57}, + {0x78, 0x58}, + {0x79, 0x59}, + {0x7a, 0x5a}, + {0xb5, 0x39c}, + {0xb5, 0x3bc}, + {0xc0, 0xe0}, + {0xc1, 0xe1}, + {0xc2, 0xe2}, + {0xc3, 0xe3}, + {0xc4, 0xe4}, + {0xc5, 0xe5}, + {0xc5, 0x212b}, + {0xc6, 0xe6}, + {0xc7, 0xe7}, + {0xc8, 0xe8}, + {0xc9, 0xe9}, + {0xca, 0xea}, + {0xcb, 0xeb}, + {0xcc, 0xec}, + {0xcd, 0xed}, + {0xce, 0xee}, + {0xcf, 0xef}, + {0xd0, 0xf0}, + {0xd1, 0xf1}, + {0xd2, 0xf2}, + {0xd3, 0xf3}, + {0xd4, 0xf4}, + {0xd5, 0xf5}, + {0xd6, 0xf6}, + {0xd8, 0xf8}, + {0xd9, 0xf9}, + {0xda, 0xfa}, + {0xdb, 0xfb}, + {0xdc, 0xfc}, + {0xdd, 0xfd}, + {0xde, 0xfe}, + {0xdf, 0x1e9e}, + {0xe0, 0xc0}, + {0xe1, 0xc1}, + {0xe2, 0xc2}, + {0xe3, 0xc3}, + {0xe4, 0xc4}, + {0xe5, 0xc5}, + {0xe5, 0x212b}, + {0xe6, 0xc6}, + {0xe7, 0xc7}, + {0xe8, 0xc8}, + {0xe9, 0xc9}, + {0xea, 0xca}, + {0xeb, 0xcb}, + {0xec, 0xcc}, + {0xed, 0xcd}, + {0xee, 0xce}, + {0xef, 0xcf}, + {0xf0, 0xd0}, + {0xf1, 0xd1}, + {0xf2, 0xd2}, + {0xf3, 0xd3}, + {0xf4, 0xd4}, + {0xf5, 0xd5}, + {0xf6, 0xd6}, + {0xf8, 0xd8}, + {0xf9, 0xd9}, + {0xfa, 0xda}, + {0xfb, 0xdb}, + {0xfc, 0xdc}, + {0xfd, 0xdd}, + {0xfe, 0xde}, + {0xff, 0x178}, + {0x100, 0x101}, + {0x101, 0x100}, + {0x102, 0x103}, + {0x103, 0x102}, + {0x104, 0x105}, + {0x105, 0x104}, + {0x106, 0x107}, + {0x107, 0x106}, + {0x108, 0x109}, + {0x109, 0x108}, + {0x10a, 0x10b}, + {0x10b, 0x10a}, + {0x10c, 0x10d}, + {0x10d, 0x10c}, + {0x10e, 0x10f}, + {0x10f, 0x10e}, + {0x110, 0x111}, + {0x111, 0x110}, + {0x112, 0x113}, + {0x113, 0x112}, + {0x114, 0x115}, + {0x115, 0x114}, + {0x116, 0x117}, + {0x117, 0x116}, + {0x118, 0x119}, + {0x119, 0x118}, + {0x11a, 0x11b}, + {0x11b, 0x11a}, + {0x11c, 0x11d}, + {0x11d, 0x11c}, + {0x11e, 0x11f}, + {0x11f, 0x11e}, + {0x120, 0x121}, + {0x121, 0x120}, + {0x122, 0x123}, + {0x123, 0x122}, + {0x124, 0x125}, + {0x125, 0x124}, + {0x126, 0x127}, + {0x127, 0x126}, + {0x128, 0x129}, + {0x129, 0x128}, + {0x12a, 0x12b}, + {0x12b, 0x12a}, + {0x12c, 0x12d}, + {0x12d, 0x12c}, + {0x12e, 0x12f}, + {0x12f, 0x12e}, + {0x132, 0x133}, + {0x133, 0x132}, + {0x134, 0x135}, + {0x135, 0x134}, + {0x136, 0x137}, + {0x137, 0x136}, + {0x139, 0x13a}, + {0x13a, 0x139}, + {0x13b, 0x13c}, + {0x13c, 0x13b}, + {0x13d, 0x13e}, + {0x13e, 0x13d}, + {0x13f, 0x140}, + {0x140, 0x13f}, + {0x141, 0x142}, + {0x142, 0x141}, + {0x143, 0x144}, + {0x144, 0x143}, + {0x145, 0x146}, + {0x146, 0x145}, + {0x147, 0x148}, + {0x148, 0x147}, + {0x14a, 0x14b}, + {0x14b, 0x14a}, + {0x14c, 0x14d}, + {0x14d, 0x14c}, + {0x14e, 0x14f}, + {0x14f, 0x14e}, + {0x150, 0x151}, + {0x151, 0x150}, + {0x152, 0x153}, + {0x153, 0x152}, + {0x154, 0x155}, + {0x155, 0x154}, + {0x156, 0x157}, + {0x157, 0x156}, + {0x158, 0x159}, + {0x159, 0x158}, + {0x15a, 0x15b}, + {0x15b, 0x15a}, + {0x15c, 0x15d}, + {0x15d, 0x15c}, + {0x15e, 0x15f}, + {0x15f, 0x15e}, + {0x160, 0x161}, + {0x161, 0x160}, + {0x162, 0x163}, + {0x163, 0x162}, + {0x164, 0x165}, + {0x165, 0x164}, + {0x166, 0x167}, + {0x167, 0x166}, + {0x168, 0x169}, + {0x169, 0x168}, + {0x16a, 0x16b}, + {0x16b, 0x16a}, + {0x16c, 0x16d}, + {0x16d, 0x16c}, + {0x16e, 0x16f}, + {0x16f, 0x16e}, + {0x170, 0x171}, + {0x171, 0x170}, + {0x172, 0x173}, + {0x173, 0x172}, + {0x174, 0x175}, + {0x175, 0x174}, + {0x176, 0x177}, + {0x177, 0x176}, + {0x178, 0xff}, + {0x179, 0x17a}, + {0x17a, 0x179}, + {0x17b, 0x17c}, + {0x17c, 0x17b}, + {0x17d, 0x17e}, + {0x17e, 0x17d}, + {0x17f, 0x53}, + {0x17f, 0x73}, + {0x180, 0x243}, + {0x181, 0x253}, + {0x182, 0x183}, + {0x183, 0x182}, + {0x184, 0x185}, + {0x185, 0x184}, + {0x186, 0x254}, + {0x187, 0x188}, + {0x188, 0x187}, + {0x189, 0x256}, + {0x18a, 0x257}, + {0x18b, 0x18c}, + {0x18c, 0x18b}, + {0x18e, 0x1dd}, + {0x18f, 0x259}, + {0x190, 0x25b}, + {0x191, 0x192}, + {0x192, 0x191}, + {0x193, 0x260}, + {0x194, 0x263}, + {0x195, 0x1f6}, + {0x196, 0x269}, + {0x197, 0x268}, + {0x198, 0x199}, + {0x199, 0x198}, + {0x19a, 0x23d}, + {0x19c, 0x26f}, + {0x19d, 0x272}, + {0x19e, 0x220}, + {0x19f, 0x275}, + {0x1a0, 0x1a1}, + {0x1a1, 0x1a0}, + {0x1a2, 0x1a3}, + {0x1a3, 0x1a2}, + {0x1a4, 0x1a5}, + {0x1a5, 0x1a4}, + {0x1a6, 0x280}, + {0x1a7, 0x1a8}, + {0x1a8, 0x1a7}, + {0x1a9, 0x283}, + {0x1ac, 0x1ad}, + {0x1ad, 0x1ac}, + {0x1ae, 0x288}, + {0x1af, 0x1b0}, + {0x1b0, 0x1af}, + {0x1b1, 0x28a}, + {0x1b2, 0x28b}, + {0x1b3, 0x1b4}, + {0x1b4, 0x1b3}, + {0x1b5, 0x1b6}, + {0x1b6, 0x1b5}, + {0x1b7, 0x292}, + {0x1b8, 0x1b9}, + {0x1b9, 0x1b8}, + {0x1bc, 0x1bd}, + {0x1bd, 0x1bc}, + {0x1bf, 0x1f7}, + {0x1c4, 0x1c5}, + {0x1c4, 0x1c6}, + {0x1c5, 0x1c4}, + {0x1c5, 0x1c6}, + {0x1c6, 0x1c4}, + {0x1c6, 0x1c5}, + {0x1c7, 0x1c8}, + {0x1c7, 0x1c9}, + {0x1c8, 0x1c7}, + {0x1c8, 0x1c9}, + {0x1c9, 0x1c7}, + {0x1c9, 0x1c8}, + {0x1ca, 0x1cb}, + {0x1ca, 0x1cc}, + {0x1cb, 0x1ca}, + {0x1cb, 0x1cc}, + {0x1cc, 0x1ca}, + {0x1cc, 0x1cb}, + {0x1cd, 0x1ce}, + {0x1ce, 0x1cd}, + {0x1cf, 0x1d0}, + {0x1d0, 0x1cf}, + {0x1d1, 0x1d2}, + {0x1d2, 0x1d1}, + {0x1d3, 0x1d4}, + {0x1d4, 0x1d3}, + {0x1d5, 0x1d6}, + {0x1d6, 0x1d5}, + {0x1d7, 0x1d8}, + {0x1d8, 0x1d7}, + {0x1d9, 0x1da}, + {0x1da, 0x1d9}, + {0x1db, 0x1dc}, + {0x1dc, 0x1db}, + {0x1dd, 0x18e}, + {0x1de, 0x1df}, + {0x1df, 0x1de}, + {0x1e0, 0x1e1}, + {0x1e1, 0x1e0}, + {0x1e2, 0x1e3}, + {0x1e3, 0x1e2}, + {0x1e4, 0x1e5}, + {0x1e5, 0x1e4}, + {0x1e6, 0x1e7}, + {0x1e7, 0x1e6}, + {0x1e8, 0x1e9}, + {0x1e9, 0x1e8}, + {0x1ea, 0x1eb}, + {0x1eb, 0x1ea}, + {0x1ec, 0x1ed}, + {0x1ed, 0x1ec}, + {0x1ee, 0x1ef}, + {0x1ef, 0x1ee}, + {0x1f1, 0x1f2}, + {0x1f1, 0x1f3}, + {0x1f2, 0x1f1}, + {0x1f2, 0x1f3}, + {0x1f3, 0x1f1}, + {0x1f3, 0x1f2}, + {0x1f4, 0x1f5}, + {0x1f5, 0x1f4}, + {0x1f6, 0x195}, + {0x1f7, 0x1bf}, + {0x1f8, 0x1f9}, + {0x1f9, 0x1f8}, + {0x1fa, 0x1fb}, + {0x1fb, 0x1fa}, + {0x1fc, 0x1fd}, + {0x1fd, 0x1fc}, + {0x1fe, 0x1ff}, + {0x1ff, 0x1fe}, + {0x200, 0x201}, + {0x201, 0x200}, + {0x202, 0x203}, + {0x203, 0x202}, + {0x204, 0x205}, + {0x205, 0x204}, + {0x206, 0x207}, + {0x207, 0x206}, + {0x208, 0x209}, + {0x209, 0x208}, + {0x20a, 0x20b}, + {0x20b, 0x20a}, + {0x20c, 0x20d}, + {0x20d, 0x20c}, + {0x20e, 0x20f}, + {0x20f, 0x20e}, + {0x210, 0x211}, + {0x211, 0x210}, + {0x212, 0x213}, + {0x213, 0x212}, + {0x214, 0x215}, + {0x215, 0x214}, + {0x216, 0x217}, + {0x217, 0x216}, + {0x218, 0x219}, + {0x219, 0x218}, + {0x21a, 0x21b}, + {0x21b, 0x21a}, + {0x21c, 0x21d}, + {0x21d, 0x21c}, + {0x21e, 0x21f}, + {0x21f, 0x21e}, + {0x220, 0x19e}, + {0x222, 0x223}, + {0x223, 0x222}, + {0x224, 0x225}, + {0x225, 0x224}, + {0x226, 0x227}, + {0x227, 0x226}, + {0x228, 0x229}, + {0x229, 0x228}, + {0x22a, 0x22b}, + {0x22b, 0x22a}, + {0x22c, 0x22d}, + {0x22d, 0x22c}, + {0x22e, 0x22f}, + {0x22f, 0x22e}, + {0x230, 0x231}, + {0x231, 0x230}, + {0x232, 0x233}, + {0x233, 0x232}, + {0x23a, 0x2c65}, + {0x23b, 0x23c}, + {0x23c, 0x23b}, + {0x23d, 0x19a}, + {0x23e, 0x2c66}, + {0x23f, 0x2c7e}, + {0x240, 0x2c7f}, + {0x241, 0x242}, + {0x242, 0x241}, + {0x243, 0x180}, + {0x244, 0x289}, + {0x245, 0x28c}, + {0x246, 0x247}, + {0x247, 0x246}, + {0x248, 0x249}, + {0x249, 0x248}, + {0x24a, 0x24b}, + {0x24b, 0x24a}, + {0x24c, 0x24d}, + {0x24d, 0x24c}, + {0x24e, 0x24f}, + {0x24f, 0x24e}, + {0x250, 0x2c6f}, + {0x251, 0x2c6d}, + {0x252, 0x2c70}, + {0x253, 0x181}, + {0x254, 0x186}, + {0x256, 0x189}, + {0x257, 0x18a}, + {0x259, 0x18f}, + {0x25b, 0x190}, + {0x25c, 0xa7ab}, + {0x260, 0x193}, + {0x261, 0xa7ac}, + {0x263, 0x194}, + {0x265, 0xa78d}, + {0x266, 0xa7aa}, + {0x268, 0x197}, + {0x269, 0x196}, + {0x26b, 0x2c62}, + {0x26c, 0xa7ad}, + {0x26f, 0x19c}, + {0x271, 0x2c6e}, + {0x272, 0x19d}, + {0x275, 0x19f}, + {0x27d, 0x2c64}, + {0x280, 0x1a6}, + {0x283, 0x1a9}, + {0x287, 0xa7b1}, + {0x288, 0x1ae}, + {0x289, 0x244}, + {0x28a, 0x1b1}, + {0x28b, 0x1b2}, + {0x28c, 0x245}, + {0x292, 0x1b7}, + {0x29e, 0xa7b0}, + {0x345, 0x399}, + {0x345, 0x3b9}, + {0x345, 0x1fbe}, + {0x370, 0x371}, + {0x371, 0x370}, + {0x372, 0x373}, + {0x373, 0x372}, + {0x376, 0x377}, + {0x377, 0x376}, + {0x37b, 0x3fd}, + {0x37c, 0x3fe}, + {0x37d, 0x3ff}, + {0x37f, 0x3f3}, + {0x386, 0x3ac}, + {0x388, 0x3ad}, + {0x389, 0x3ae}, + {0x38a, 0x3af}, + {0x38c, 0x3cc}, + {0x38e, 0x3cd}, + {0x38f, 0x3ce}, + {0x391, 0x3b1}, + {0x392, 0x3b2}, + {0x392, 0x3d0}, + {0x393, 0x3b3}, + {0x394, 0x3b4}, + {0x395, 0x3b5}, + {0x395, 0x3f5}, + {0x396, 0x3b6}, + {0x397, 0x3b7}, + {0x398, 0x3b8}, + {0x398, 0x3d1}, + {0x398, 0x3f4}, + {0x399, 0x345}, + {0x399, 0x3b9}, + {0x399, 0x1fbe}, + {0x39a, 0x3ba}, + {0x39a, 0x3f0}, + {0x39b, 0x3bb}, + {0x39c, 0xb5}, + {0x39c, 0x3bc}, + {0x39d, 0x3bd}, + {0x39e, 0x3be}, + {0x39f, 0x3bf}, + {0x3a0, 0x3c0}, + {0x3a0, 0x3d6}, + {0x3a1, 0x3c1}, + {0x3a1, 0x3f1}, + {0x3a3, 0x3c2}, + {0x3a3, 0x3c3}, + {0x3a4, 0x3c4}, + {0x3a5, 0x3c5}, + {0x3a6, 0x3c6}, + {0x3a6, 0x3d5}, + {0x3a7, 0x3c7}, + {0x3a8, 0x3c8}, + {0x3a9, 0x3c9}, + {0x3a9, 0x2126}, + {0x3aa, 0x3ca}, + {0x3ab, 0x3cb}, + {0x3ac, 0x386}, + {0x3ad, 0x388}, + {0x3ae, 0x389}, + {0x3af, 0x38a}, + {0x3b1, 0x391}, + {0x3b2, 0x392}, + {0x3b2, 0x3d0}, + {0x3b3, 0x393}, + {0x3b4, 0x394}, + {0x3b5, 0x395}, + {0x3b5, 0x3f5}, + {0x3b6, 0x396}, + {0x3b7, 0x397}, + {0x3b8, 0x398}, + {0x3b8, 0x3d1}, + {0x3b8, 0x3f4}, + {0x3b9, 0x345}, + {0x3b9, 0x399}, + {0x3b9, 0x1fbe}, + {0x3ba, 0x39a}, + {0x3ba, 0x3f0}, + {0x3bb, 0x39b}, + {0x3bc, 0xb5}, + {0x3bc, 0x39c}, + {0x3bd, 0x39d}, + {0x3be, 0x39e}, + {0x3bf, 0x39f}, + {0x3c0, 0x3a0}, + {0x3c0, 0x3d6}, + {0x3c1, 0x3a1}, + {0x3c1, 0x3f1}, + {0x3c2, 0x3a3}, + {0x3c2, 0x3c3}, + {0x3c3, 0x3a3}, + {0x3c3, 0x3c2}, + {0x3c4, 0x3a4}, + {0x3c5, 0x3a5}, + {0x3c6, 0x3a6}, + {0x3c6, 0x3d5}, + {0x3c7, 0x3a7}, + {0x3c8, 0x3a8}, + {0x3c9, 0x3a9}, + {0x3c9, 0x2126}, + {0x3ca, 0x3aa}, + {0x3cb, 0x3ab}, + {0x3cc, 0x38c}, + {0x3cd, 0x38e}, + {0x3ce, 0x38f}, + {0x3cf, 0x3d7}, + {0x3d0, 0x392}, + {0x3d0, 0x3b2}, + {0x3d1, 0x398}, + {0x3d1, 0x3b8}, + {0x3d1, 0x3f4}, + {0x3d5, 0x3a6}, + {0x3d5, 0x3c6}, + {0x3d6, 0x3a0}, + {0x3d6, 0x3c0}, + {0x3d7, 0x3cf}, + {0x3d8, 0x3d9}, + {0x3d9, 0x3d8}, + {0x3da, 0x3db}, + {0x3db, 0x3da}, + {0x3dc, 0x3dd}, + {0x3dd, 0x3dc}, + {0x3de, 0x3df}, + {0x3df, 0x3de}, + {0x3e0, 0x3e1}, + {0x3e1, 0x3e0}, + {0x3e2, 0x3e3}, + {0x3e3, 0x3e2}, + {0x3e4, 0x3e5}, + {0x3e5, 0x3e4}, + {0x3e6, 0x3e7}, + {0x3e7, 0x3e6}, + {0x3e8, 0x3e9}, + {0x3e9, 0x3e8}, + {0x3ea, 0x3eb}, + {0x3eb, 0x3ea}, + {0x3ec, 0x3ed}, + {0x3ed, 0x3ec}, + {0x3ee, 0x3ef}, + {0x3ef, 0x3ee}, + {0x3f0, 0x39a}, + {0x3f0, 0x3ba}, + {0x3f1, 0x3a1}, + {0x3f1, 0x3c1}, + {0x3f2, 0x3f9}, + {0x3f3, 0x37f}, + {0x3f4, 0x398}, + {0x3f4, 0x3b8}, + {0x3f4, 0x3d1}, + {0x3f5, 0x395}, + {0x3f5, 0x3b5}, + {0x3f7, 0x3f8}, + {0x3f8, 0x3f7}, + {0x3f9, 0x3f2}, + {0x3fa, 0x3fb}, + {0x3fb, 0x3fa}, + {0x3fd, 0x37b}, + {0x3fe, 0x37c}, + {0x3ff, 0x37d}, + {0x400, 0x450}, + {0x401, 0x451}, + {0x402, 0x452}, + {0x403, 0x453}, + {0x404, 0x454}, + {0x405, 0x455}, + {0x406, 0x456}, + {0x407, 0x457}, + {0x408, 0x458}, + {0x409, 0x459}, + {0x40a, 0x45a}, + {0x40b, 0x45b}, + {0x40c, 0x45c}, + {0x40d, 0x45d}, + {0x40e, 0x45e}, + {0x40f, 0x45f}, + {0x410, 0x430}, + {0x411, 0x431}, + {0x412, 0x432}, + {0x413, 0x433}, + {0x414, 0x434}, + {0x415, 0x435}, + {0x416, 0x436}, + {0x417, 0x437}, + {0x418, 0x438}, + {0x419, 0x439}, + {0x41a, 0x43a}, + {0x41b, 0x43b}, + {0x41c, 0x43c}, + {0x41d, 0x43d}, + {0x41e, 0x43e}, + {0x41f, 0x43f}, + {0x420, 0x440}, + {0x421, 0x441}, + {0x422, 0x442}, + {0x423, 0x443}, + {0x424, 0x444}, + {0x425, 0x445}, + {0x426, 0x446}, + {0x427, 0x447}, + {0x428, 0x448}, + {0x429, 0x449}, + {0x42a, 0x44a}, + {0x42b, 0x44b}, + {0x42c, 0x44c}, + {0x42d, 0x44d}, + {0x42e, 0x44e}, + {0x42f, 0x44f}, + {0x430, 0x410}, + {0x431, 0x411}, + {0x432, 0x412}, + {0x433, 0x413}, + {0x434, 0x414}, + {0x435, 0x415}, + {0x436, 0x416}, + {0x437, 0x417}, + {0x438, 0x418}, + {0x439, 0x419}, + {0x43a, 0x41a}, + {0x43b, 0x41b}, + {0x43c, 0x41c}, + {0x43d, 0x41d}, + {0x43e, 0x41e}, + {0x43f, 0x41f}, + {0x440, 0x420}, + {0x441, 0x421}, + {0x442, 0x422}, + {0x443, 0x423}, + {0x444, 0x424}, + {0x445, 0x425}, + {0x446, 0x426}, + {0x447, 0x427}, + {0x448, 0x428}, + {0x449, 0x429}, + {0x44a, 0x42a}, + {0x44b, 0x42b}, + {0x44c, 0x42c}, + {0x44d, 0x42d}, + {0x44e, 0x42e}, + {0x44f, 0x42f}, + {0x450, 0x400}, + {0x451, 0x401}, + {0x452, 0x402}, + {0x453, 0x403}, + {0x454, 0x404}, + {0x455, 0x405}, + {0x456, 0x406}, + {0x457, 0x407}, + {0x458, 0x408}, + {0x459, 0x409}, + {0x45a, 0x40a}, + {0x45b, 0x40b}, + {0x45c, 0x40c}, + {0x45d, 0x40d}, + {0x45e, 0x40e}, + {0x45f, 0x40f}, + {0x460, 0x461}, + {0x461, 0x460}, + {0x462, 0x463}, + {0x463, 0x462}, + {0x464, 0x465}, + {0x465, 0x464}, + {0x466, 0x467}, + {0x467, 0x466}, + {0x468, 0x469}, + {0x469, 0x468}, + {0x46a, 0x46b}, + {0x46b, 0x46a}, + {0x46c, 0x46d}, + {0x46d, 0x46c}, + {0x46e, 0x46f}, + {0x46f, 0x46e}, + {0x470, 0x471}, + {0x471, 0x470}, + {0x472, 0x473}, + {0x473, 0x472}, + {0x474, 0x475}, + {0x475, 0x474}, + {0x476, 0x477}, + {0x477, 0x476}, + {0x478, 0x479}, + {0x479, 0x478}, + {0x47a, 0x47b}, + {0x47b, 0x47a}, + {0x47c, 0x47d}, + {0x47d, 0x47c}, + {0x47e, 0x47f}, + {0x47f, 0x47e}, + {0x480, 0x481}, + {0x481, 0x480}, + {0x48a, 0x48b}, + {0x48b, 0x48a}, + {0x48c, 0x48d}, + {0x48d, 0x48c}, + {0x48e, 0x48f}, + {0x48f, 0x48e}, + {0x490, 0x491}, + {0x491, 0x490}, + {0x492, 0x493}, + {0x493, 0x492}, + {0x494, 0x495}, + {0x495, 0x494}, + {0x496, 0x497}, + {0x497, 0x496}, + {0x498, 0x499}, + {0x499, 0x498}, + {0x49a, 0x49b}, + {0x49b, 0x49a}, + {0x49c, 0x49d}, + {0x49d, 0x49c}, + {0x49e, 0x49f}, + {0x49f, 0x49e}, + {0x4a0, 0x4a1}, + {0x4a1, 0x4a0}, + {0x4a2, 0x4a3}, + {0x4a3, 0x4a2}, + {0x4a4, 0x4a5}, + {0x4a5, 0x4a4}, + {0x4a6, 0x4a7}, + {0x4a7, 0x4a6}, + {0x4a8, 0x4a9}, + {0x4a9, 0x4a8}, + {0x4aa, 0x4ab}, + {0x4ab, 0x4aa}, + {0x4ac, 0x4ad}, + {0x4ad, 0x4ac}, + {0x4ae, 0x4af}, + {0x4af, 0x4ae}, + {0x4b0, 0x4b1}, + {0x4b1, 0x4b0}, + {0x4b2, 0x4b3}, + {0x4b3, 0x4b2}, + {0x4b4, 0x4b5}, + {0x4b5, 0x4b4}, + {0x4b6, 0x4b7}, + {0x4b7, 0x4b6}, + {0x4b8, 0x4b9}, + {0x4b9, 0x4b8}, + {0x4ba, 0x4bb}, + {0x4bb, 0x4ba}, + {0x4bc, 0x4bd}, + {0x4bd, 0x4bc}, + {0x4be, 0x4bf}, + {0x4bf, 0x4be}, + {0x4c0, 0x4cf}, + {0x4c1, 0x4c2}, + {0x4c2, 0x4c1}, + {0x4c3, 0x4c4}, + {0x4c4, 0x4c3}, + {0x4c5, 0x4c6}, + {0x4c6, 0x4c5}, + {0x4c7, 0x4c8}, + {0x4c8, 0x4c7}, + {0x4c9, 0x4ca}, + {0x4ca, 0x4c9}, + {0x4cb, 0x4cc}, + {0x4cc, 0x4cb}, + {0x4cd, 0x4ce}, + {0x4ce, 0x4cd}, + {0x4cf, 0x4c0}, + {0x4d0, 0x4d1}, + {0x4d1, 0x4d0}, + {0x4d2, 0x4d3}, + {0x4d3, 0x4d2}, + {0x4d4, 0x4d5}, + {0x4d5, 0x4d4}, + {0x4d6, 0x4d7}, + {0x4d7, 0x4d6}, + {0x4d8, 0x4d9}, + {0x4d9, 0x4d8}, + {0x4da, 0x4db}, + {0x4db, 0x4da}, + {0x4dc, 0x4dd}, + {0x4dd, 0x4dc}, + {0x4de, 0x4df}, + {0x4df, 0x4de}, + {0x4e0, 0x4e1}, + {0x4e1, 0x4e0}, + {0x4e2, 0x4e3}, + {0x4e3, 0x4e2}, + {0x4e4, 0x4e5}, + {0x4e5, 0x4e4}, + {0x4e6, 0x4e7}, + {0x4e7, 0x4e6}, + {0x4e8, 0x4e9}, + {0x4e9, 0x4e8}, + {0x4ea, 0x4eb}, + {0x4eb, 0x4ea}, + {0x4ec, 0x4ed}, + {0x4ed, 0x4ec}, + {0x4ee, 0x4ef}, + {0x4ef, 0x4ee}, + {0x4f0, 0x4f1}, + {0x4f1, 0x4f0}, + {0x4f2, 0x4f3}, + {0x4f3, 0x4f2}, + {0x4f4, 0x4f5}, + {0x4f5, 0x4f4}, + {0x4f6, 0x4f7}, + {0x4f7, 0x4f6}, + {0x4f8, 0x4f9}, + {0x4f9, 0x4f8}, + {0x4fa, 0x4fb}, + {0x4fb, 0x4fa}, + {0x4fc, 0x4fd}, + {0x4fd, 0x4fc}, + {0x4fe, 0x4ff}, + {0x4ff, 0x4fe}, + {0x500, 0x501}, + {0x501, 0x500}, + {0x502, 0x503}, + {0x503, 0x502}, + {0x504, 0x505}, + {0x505, 0x504}, + {0x506, 0x507}, + {0x507, 0x506}, + {0x508, 0x509}, + {0x509, 0x508}, + {0x50a, 0x50b}, + {0x50b, 0x50a}, + {0x50c, 0x50d}, + {0x50d, 0x50c}, + {0x50e, 0x50f}, + {0x50f, 0x50e}, + {0x510, 0x511}, + {0x511, 0x510}, + {0x512, 0x513}, + {0x513, 0x512}, + {0x514, 0x515}, + {0x515, 0x514}, + {0x516, 0x517}, + {0x517, 0x516}, + {0x518, 0x519}, + {0x519, 0x518}, + {0x51a, 0x51b}, + {0x51b, 0x51a}, + {0x51c, 0x51d}, + {0x51d, 0x51c}, + {0x51e, 0x51f}, + {0x51f, 0x51e}, + {0x520, 0x521}, + {0x521, 0x520}, + {0x522, 0x523}, + {0x523, 0x522}, + {0x524, 0x525}, + {0x525, 0x524}, + {0x526, 0x527}, + {0x527, 0x526}, + {0x528, 0x529}, + {0x529, 0x528}, + {0x52a, 0x52b}, + {0x52b, 0x52a}, + {0x52c, 0x52d}, + {0x52d, 0x52c}, + {0x52e, 0x52f}, + {0x52f, 0x52e}, + {0x531, 0x561}, + {0x532, 0x562}, + {0x533, 0x563}, + {0x534, 0x564}, + {0x535, 0x565}, + {0x536, 0x566}, + {0x537, 0x567}, + {0x538, 0x568}, + {0x539, 0x569}, + {0x53a, 0x56a}, + {0x53b, 0x56b}, + {0x53c, 0x56c}, + {0x53d, 0x56d}, + {0x53e, 0x56e}, + {0x53f, 0x56f}, + {0x540, 0x570}, + {0x541, 0x571}, + {0x542, 0x572}, + {0x543, 0x573}, + {0x544, 0x574}, + {0x545, 0x575}, + {0x546, 0x576}, + {0x547, 0x577}, + {0x548, 0x578}, + {0x549, 0x579}, + {0x54a, 0x57a}, + {0x54b, 0x57b}, + {0x54c, 0x57c}, + {0x54d, 0x57d}, + {0x54e, 0x57e}, + {0x54f, 0x57f}, + {0x550, 0x580}, + {0x551, 0x581}, + {0x552, 0x582}, + {0x553, 0x583}, + {0x554, 0x584}, + {0x555, 0x585}, + {0x556, 0x586}, + {0x561, 0x531}, + {0x562, 0x532}, + {0x563, 0x533}, + {0x564, 0x534}, + {0x565, 0x535}, + {0x566, 0x536}, + {0x567, 0x537}, + {0x568, 0x538}, + {0x569, 0x539}, + {0x56a, 0x53a}, + {0x56b, 0x53b}, + {0x56c, 0x53c}, + {0x56d, 0x53d}, + {0x56e, 0x53e}, + {0x56f, 0x53f}, + {0x570, 0x540}, + {0x571, 0x541}, + {0x572, 0x542}, + {0x573, 0x543}, + {0x574, 0x544}, + {0x575, 0x545}, + {0x576, 0x546}, + {0x577, 0x547}, + {0x578, 0x548}, + {0x579, 0x549}, + {0x57a, 0x54a}, + {0x57b, 0x54b}, + {0x57c, 0x54c}, + {0x57d, 0x54d}, + {0x57e, 0x54e}, + {0x57f, 0x54f}, + {0x580, 0x550}, + {0x581, 0x551}, + {0x582, 0x552}, + {0x583, 0x553}, + {0x584, 0x554}, + {0x585, 0x555}, + {0x586, 0x556}, + {0x10a0, 0x2d00}, + {0x10a1, 0x2d01}, + {0x10a2, 0x2d02}, + {0x10a3, 0x2d03}, + {0x10a4, 0x2d04}, + {0x10a5, 0x2d05}, + {0x10a6, 0x2d06}, + {0x10a7, 0x2d07}, + {0x10a8, 0x2d08}, + {0x10a9, 0x2d09}, + {0x10aa, 0x2d0a}, + {0x10ab, 0x2d0b}, + {0x10ac, 0x2d0c}, + {0x10ad, 0x2d0d}, + {0x10ae, 0x2d0e}, + {0x10af, 0x2d0f}, + {0x10b0, 0x2d10}, + {0x10b1, 0x2d11}, + {0x10b2, 0x2d12}, + {0x10b3, 0x2d13}, + {0x10b4, 0x2d14}, + {0x10b5, 0x2d15}, + {0x10b6, 0x2d16}, + {0x10b7, 0x2d17}, + {0x10b8, 0x2d18}, + {0x10b9, 0x2d19}, + {0x10ba, 0x2d1a}, + {0x10bb, 0x2d1b}, + {0x10bc, 0x2d1c}, + {0x10bd, 0x2d1d}, + {0x10be, 0x2d1e}, + {0x10bf, 0x2d1f}, + {0x10c0, 0x2d20}, + {0x10c1, 0x2d21}, + {0x10c2, 0x2d22}, + {0x10c3, 0x2d23}, + {0x10c4, 0x2d24}, + {0x10c5, 0x2d25}, + {0x10c7, 0x2d27}, + {0x10cd, 0x2d2d}, + {0x1d79, 0xa77d}, + {0x1d7d, 0x2c63}, + {0x1e00, 0x1e01}, + {0x1e01, 0x1e00}, + {0x1e02, 0x1e03}, + {0x1e03, 0x1e02}, + {0x1e04, 0x1e05}, + {0x1e05, 0x1e04}, + {0x1e06, 0x1e07}, + {0x1e07, 0x1e06}, + {0x1e08, 0x1e09}, + {0x1e09, 0x1e08}, + {0x1e0a, 0x1e0b}, + {0x1e0b, 0x1e0a}, + {0x1e0c, 0x1e0d}, + {0x1e0d, 0x1e0c}, + {0x1e0e, 0x1e0f}, + {0x1e0f, 0x1e0e}, + {0x1e10, 0x1e11}, + {0x1e11, 0x1e10}, + {0x1e12, 0x1e13}, + {0x1e13, 0x1e12}, + {0x1e14, 0x1e15}, + {0x1e15, 0x1e14}, + {0x1e16, 0x1e17}, + {0x1e17, 0x1e16}, + {0x1e18, 0x1e19}, + {0x1e19, 0x1e18}, + {0x1e1a, 0x1e1b}, + {0x1e1b, 0x1e1a}, + {0x1e1c, 0x1e1d}, + {0x1e1d, 0x1e1c}, + {0x1e1e, 0x1e1f}, + {0x1e1f, 0x1e1e}, + {0x1e20, 0x1e21}, + {0x1e21, 0x1e20}, + {0x1e22, 0x1e23}, + {0x1e23, 0x1e22}, + {0x1e24, 0x1e25}, + {0x1e25, 0x1e24}, + {0x1e26, 0x1e27}, + {0x1e27, 0x1e26}, + {0x1e28, 0x1e29}, + {0x1e29, 0x1e28}, + {0x1e2a, 0x1e2b}, + {0x1e2b, 0x1e2a}, + {0x1e2c, 0x1e2d}, + {0x1e2d, 0x1e2c}, + {0x1e2e, 0x1e2f}, + {0x1e2f, 0x1e2e}, + {0x1e30, 0x1e31}, + {0x1e31, 0x1e30}, + {0x1e32, 0x1e33}, + {0x1e33, 0x1e32}, + {0x1e34, 0x1e35}, + {0x1e35, 0x1e34}, + {0x1e36, 0x1e37}, + {0x1e37, 0x1e36}, + {0x1e38, 0x1e39}, + {0x1e39, 0x1e38}, + {0x1e3a, 0x1e3b}, + {0x1e3b, 0x1e3a}, + {0x1e3c, 0x1e3d}, + {0x1e3d, 0x1e3c}, + {0x1e3e, 0x1e3f}, + {0x1e3f, 0x1e3e}, + {0x1e40, 0x1e41}, + {0x1e41, 0x1e40}, + {0x1e42, 0x1e43}, + {0x1e43, 0x1e42}, + {0x1e44, 0x1e45}, + {0x1e45, 0x1e44}, + {0x1e46, 0x1e47}, + {0x1e47, 0x1e46}, + {0x1e48, 0x1e49}, + {0x1e49, 0x1e48}, + {0x1e4a, 0x1e4b}, + {0x1e4b, 0x1e4a}, + {0x1e4c, 0x1e4d}, + {0x1e4d, 0x1e4c}, + {0x1e4e, 0x1e4f}, + {0x1e4f, 0x1e4e}, + {0x1e50, 0x1e51}, + {0x1e51, 0x1e50}, + {0x1e52, 0x1e53}, + {0x1e53, 0x1e52}, + {0x1e54, 0x1e55}, + {0x1e55, 0x1e54}, + {0x1e56, 0x1e57}, + {0x1e57, 0x1e56}, + {0x1e58, 0x1e59}, + {0x1e59, 0x1e58}, + {0x1e5a, 0x1e5b}, + {0x1e5b, 0x1e5a}, + {0x1e5c, 0x1e5d}, + {0x1e5d, 0x1e5c}, + {0x1e5e, 0x1e5f}, + {0x1e5f, 0x1e5e}, + {0x1e60, 0x1e61}, + {0x1e60, 0x1e9b}, + {0x1e61, 0x1e60}, + {0x1e61, 0x1e9b}, + {0x1e62, 0x1e63}, + {0x1e63, 0x1e62}, + {0x1e64, 0x1e65}, + {0x1e65, 0x1e64}, + {0x1e66, 0x1e67}, + {0x1e67, 0x1e66}, + {0x1e68, 0x1e69}, + {0x1e69, 0x1e68}, + {0x1e6a, 0x1e6b}, + {0x1e6b, 0x1e6a}, + {0x1e6c, 0x1e6d}, + {0x1e6d, 0x1e6c}, + {0x1e6e, 0x1e6f}, + {0x1e6f, 0x1e6e}, + {0x1e70, 0x1e71}, + {0x1e71, 0x1e70}, + {0x1e72, 0x1e73}, + {0x1e73, 0x1e72}, + {0x1e74, 0x1e75}, + {0x1e75, 0x1e74}, + {0x1e76, 0x1e77}, + {0x1e77, 0x1e76}, + {0x1e78, 0x1e79}, + {0x1e79, 0x1e78}, + {0x1e7a, 0x1e7b}, + {0x1e7b, 0x1e7a}, + {0x1e7c, 0x1e7d}, + {0x1e7d, 0x1e7c}, + {0x1e7e, 0x1e7f}, + {0x1e7f, 0x1e7e}, + {0x1e80, 0x1e81}, + {0x1e81, 0x1e80}, + {0x1e82, 0x1e83}, + {0x1e83, 0x1e82}, + {0x1e84, 0x1e85}, + {0x1e85, 0x1e84}, + {0x1e86, 0x1e87}, + {0x1e87, 0x1e86}, + {0x1e88, 0x1e89}, + {0x1e89, 0x1e88}, + {0x1e8a, 0x1e8b}, + {0x1e8b, 0x1e8a}, + {0x1e8c, 0x1e8d}, + {0x1e8d, 0x1e8c}, + {0x1e8e, 0x1e8f}, + {0x1e8f, 0x1e8e}, + {0x1e90, 0x1e91}, + {0x1e91, 0x1e90}, + {0x1e92, 0x1e93}, + {0x1e93, 0x1e92}, + {0x1e94, 0x1e95}, + {0x1e95, 0x1e94}, + {0x1e9b, 0x1e60}, + {0x1e9b, 0x1e61}, + {0x1e9e, 0xdf}, + {0x1ea0, 0x1ea1}, + {0x1ea1, 0x1ea0}, + {0x1ea2, 0x1ea3}, + {0x1ea3, 0x1ea2}, + {0x1ea4, 0x1ea5}, + {0x1ea5, 0x1ea4}, + {0x1ea6, 0x1ea7}, + {0x1ea7, 0x1ea6}, + {0x1ea8, 0x1ea9}, + {0x1ea9, 0x1ea8}, + {0x1eaa, 0x1eab}, + {0x1eab, 0x1eaa}, + {0x1eac, 0x1ead}, + {0x1ead, 0x1eac}, + {0x1eae, 0x1eaf}, + {0x1eaf, 0x1eae}, + {0x1eb0, 0x1eb1}, + {0x1eb1, 0x1eb0}, + {0x1eb2, 0x1eb3}, + {0x1eb3, 0x1eb2}, + {0x1eb4, 0x1eb5}, + {0x1eb5, 0x1eb4}, + {0x1eb6, 0x1eb7}, + {0x1eb7, 0x1eb6}, + {0x1eb8, 0x1eb9}, + {0x1eb9, 0x1eb8}, + {0x1eba, 0x1ebb}, + {0x1ebb, 0x1eba}, + {0x1ebc, 0x1ebd}, + {0x1ebd, 0x1ebc}, + {0x1ebe, 0x1ebf}, + {0x1ebf, 0x1ebe}, + {0x1ec0, 0x1ec1}, + {0x1ec1, 0x1ec0}, + {0x1ec2, 0x1ec3}, + {0x1ec3, 0x1ec2}, + {0x1ec4, 0x1ec5}, + {0x1ec5, 0x1ec4}, + {0x1ec6, 0x1ec7}, + {0x1ec7, 0x1ec6}, + {0x1ec8, 0x1ec9}, + {0x1ec9, 0x1ec8}, + {0x1eca, 0x1ecb}, + {0x1ecb, 0x1eca}, + {0x1ecc, 0x1ecd}, + {0x1ecd, 0x1ecc}, + {0x1ece, 0x1ecf}, + {0x1ecf, 0x1ece}, + {0x1ed0, 0x1ed1}, + {0x1ed1, 0x1ed0}, + {0x1ed2, 0x1ed3}, + {0x1ed3, 0x1ed2}, + {0x1ed4, 0x1ed5}, + {0x1ed5, 0x1ed4}, + {0x1ed6, 0x1ed7}, + {0x1ed7, 0x1ed6}, + {0x1ed8, 0x1ed9}, + {0x1ed9, 0x1ed8}, + {0x1eda, 0x1edb}, + {0x1edb, 0x1eda}, + {0x1edc, 0x1edd}, + {0x1edd, 0x1edc}, + {0x1ede, 0x1edf}, + {0x1edf, 0x1ede}, + {0x1ee0, 0x1ee1}, + {0x1ee1, 0x1ee0}, + {0x1ee2, 0x1ee3}, + {0x1ee3, 0x1ee2}, + {0x1ee4, 0x1ee5}, + {0x1ee5, 0x1ee4}, + {0x1ee6, 0x1ee7}, + {0x1ee7, 0x1ee6}, + {0x1ee8, 0x1ee9}, + {0x1ee9, 0x1ee8}, + {0x1eea, 0x1eeb}, + {0x1eeb, 0x1eea}, + {0x1eec, 0x1eed}, + {0x1eed, 0x1eec}, + {0x1eee, 0x1eef}, + {0x1eef, 0x1eee}, + {0x1ef0, 0x1ef1}, + {0x1ef1, 0x1ef0}, + {0x1ef2, 0x1ef3}, + {0x1ef3, 0x1ef2}, + {0x1ef4, 0x1ef5}, + {0x1ef5, 0x1ef4}, + {0x1ef6, 0x1ef7}, + {0x1ef7, 0x1ef6}, + {0x1ef8, 0x1ef9}, + {0x1ef9, 0x1ef8}, + {0x1efa, 0x1efb}, + {0x1efb, 0x1efa}, + {0x1efc, 0x1efd}, + {0x1efd, 0x1efc}, + {0x1efe, 0x1eff}, + {0x1eff, 0x1efe}, + {0x1f00, 0x1f08}, + {0x1f01, 0x1f09}, + {0x1f02, 0x1f0a}, + {0x1f03, 0x1f0b}, + {0x1f04, 0x1f0c}, + {0x1f05, 0x1f0d}, + {0x1f06, 0x1f0e}, + {0x1f07, 0x1f0f}, + {0x1f08, 0x1f00}, + {0x1f09, 0x1f01}, + {0x1f0a, 0x1f02}, + {0x1f0b, 0x1f03}, + {0x1f0c, 0x1f04}, + {0x1f0d, 0x1f05}, + {0x1f0e, 0x1f06}, + {0x1f0f, 0x1f07}, + {0x1f10, 0x1f18}, + {0x1f11, 0x1f19}, + {0x1f12, 0x1f1a}, + {0x1f13, 0x1f1b}, + {0x1f14, 0x1f1c}, + {0x1f15, 0x1f1d}, + {0x1f18, 0x1f10}, + {0x1f19, 0x1f11}, + {0x1f1a, 0x1f12}, + {0x1f1b, 0x1f13}, + {0x1f1c, 0x1f14}, + {0x1f1d, 0x1f15}, + {0x1f20, 0x1f28}, + {0x1f21, 0x1f29}, + {0x1f22, 0x1f2a}, + {0x1f23, 0x1f2b}, + {0x1f24, 0x1f2c}, + {0x1f25, 0x1f2d}, + {0x1f26, 0x1f2e}, + {0x1f27, 0x1f2f}, + {0x1f28, 0x1f20}, + {0x1f29, 0x1f21}, + {0x1f2a, 0x1f22}, + {0x1f2b, 0x1f23}, + {0x1f2c, 0x1f24}, + {0x1f2d, 0x1f25}, + {0x1f2e, 0x1f26}, + {0x1f2f, 0x1f27}, + {0x1f30, 0x1f38}, + {0x1f31, 0x1f39}, + {0x1f32, 0x1f3a}, + {0x1f33, 0x1f3b}, + {0x1f34, 0x1f3c}, + {0x1f35, 0x1f3d}, + {0x1f36, 0x1f3e}, + {0x1f37, 0x1f3f}, + {0x1f38, 0x1f30}, + {0x1f39, 0x1f31}, + {0x1f3a, 0x1f32}, + {0x1f3b, 0x1f33}, + {0x1f3c, 0x1f34}, + {0x1f3d, 0x1f35}, + {0x1f3e, 0x1f36}, + {0x1f3f, 0x1f37}, + {0x1f40, 0x1f48}, + {0x1f41, 0x1f49}, + {0x1f42, 0x1f4a}, + {0x1f43, 0x1f4b}, + {0x1f44, 0x1f4c}, + {0x1f45, 0x1f4d}, + {0x1f48, 0x1f40}, + {0x1f49, 0x1f41}, + {0x1f4a, 0x1f42}, + {0x1f4b, 0x1f43}, + {0x1f4c, 0x1f44}, + {0x1f4d, 0x1f45}, + {0x1f51, 0x1f59}, + {0x1f53, 0x1f5b}, + {0x1f55, 0x1f5d}, + {0x1f57, 0x1f5f}, + {0x1f59, 0x1f51}, + {0x1f5b, 0x1f53}, + {0x1f5d, 0x1f55}, + {0x1f5f, 0x1f57}, + {0x1f60, 0x1f68}, + {0x1f61, 0x1f69}, + {0x1f62, 0x1f6a}, + {0x1f63, 0x1f6b}, + {0x1f64, 0x1f6c}, + {0x1f65, 0x1f6d}, + {0x1f66, 0x1f6e}, + {0x1f67, 0x1f6f}, + {0x1f68, 0x1f60}, + {0x1f69, 0x1f61}, + {0x1f6a, 0x1f62}, + {0x1f6b, 0x1f63}, + {0x1f6c, 0x1f64}, + {0x1f6d, 0x1f65}, + {0x1f6e, 0x1f66}, + {0x1f6f, 0x1f67}, + {0x1f70, 0x1fba}, + {0x1f71, 0x1fbb}, + {0x1f72, 0x1fc8}, + {0x1f73, 0x1fc9}, + {0x1f74, 0x1fca}, + {0x1f75, 0x1fcb}, + {0x1f76, 0x1fda}, + {0x1f77, 0x1fdb}, + {0x1f78, 0x1ff8}, + {0x1f79, 0x1ff9}, + {0x1f7a, 0x1fea}, + {0x1f7b, 0x1feb}, + {0x1f7c, 0x1ffa}, + {0x1f7d, 0x1ffb}, + {0x1f80, 0x1f88}, + {0x1f81, 0x1f89}, + {0x1f82, 0x1f8a}, + {0x1f83, 0x1f8b}, + {0x1f84, 0x1f8c}, + {0x1f85, 0x1f8d}, + {0x1f86, 0x1f8e}, + {0x1f87, 0x1f8f}, + {0x1f88, 0x1f80}, + {0x1f89, 0x1f81}, + {0x1f8a, 0x1f82}, + {0x1f8b, 0x1f83}, + {0x1f8c, 0x1f84}, + {0x1f8d, 0x1f85}, + {0x1f8e, 0x1f86}, + {0x1f8f, 0x1f87}, + {0x1f90, 0x1f98}, + {0x1f91, 0x1f99}, + {0x1f92, 0x1f9a}, + {0x1f93, 0x1f9b}, + {0x1f94, 0x1f9c}, + {0x1f95, 0x1f9d}, + {0x1f96, 0x1f9e}, + {0x1f97, 0x1f9f}, + {0x1f98, 0x1f90}, + {0x1f99, 0x1f91}, + {0x1f9a, 0x1f92}, + {0x1f9b, 0x1f93}, + {0x1f9c, 0x1f94}, + {0x1f9d, 0x1f95}, + {0x1f9e, 0x1f96}, + {0x1f9f, 0x1f97}, + {0x1fa0, 0x1fa8}, + {0x1fa1, 0x1fa9}, + {0x1fa2, 0x1faa}, + {0x1fa3, 0x1fab}, + {0x1fa4, 0x1fac}, + {0x1fa5, 0x1fad}, + {0x1fa6, 0x1fae}, + {0x1fa7, 0x1faf}, + {0x1fa8, 0x1fa0}, + {0x1fa9, 0x1fa1}, + {0x1faa, 0x1fa2}, + {0x1fab, 0x1fa3}, + {0x1fac, 0x1fa4}, + {0x1fad, 0x1fa5}, + {0x1fae, 0x1fa6}, + {0x1faf, 0x1fa7}, + {0x1fb0, 0x1fb8}, + {0x1fb1, 0x1fb9}, + {0x1fb3, 0x1fbc}, + {0x1fb8, 0x1fb0}, + {0x1fb9, 0x1fb1}, + {0x1fba, 0x1f70}, + {0x1fbb, 0x1f71}, + {0x1fbc, 0x1fb3}, + {0x1fbe, 0x345}, + {0x1fbe, 0x399}, + {0x1fbe, 0x3b9}, + {0x1fc3, 0x1fcc}, + {0x1fc8, 0x1f72}, + {0x1fc9, 0x1f73}, + {0x1fca, 0x1f74}, + {0x1fcb, 0x1f75}, + {0x1fcc, 0x1fc3}, + {0x1fd0, 0x1fd8}, + {0x1fd1, 0x1fd9}, + {0x1fd8, 0x1fd0}, + {0x1fd9, 0x1fd1}, + {0x1fda, 0x1f76}, + {0x1fdb, 0x1f77}, + {0x1fe0, 0x1fe8}, + {0x1fe1, 0x1fe9}, + {0x1fe5, 0x1fec}, + {0x1fe8, 0x1fe0}, + {0x1fe9, 0x1fe1}, + {0x1fea, 0x1f7a}, + {0x1feb, 0x1f7b}, + {0x1fec, 0x1fe5}, + {0x1ff3, 0x1ffc}, + {0x1ff8, 0x1f78}, + {0x1ff9, 0x1f79}, + {0x1ffa, 0x1f7c}, + {0x1ffb, 0x1f7d}, + {0x1ffc, 0x1ff3}, + {0x2126, 0x3a9}, + {0x2126, 0x3c9}, + {0x212a, 0x4b}, + {0x212a, 0x6b}, + {0x212b, 0xc5}, + {0x212b, 0xe5}, + {0x2132, 0x214e}, + {0x214e, 0x2132}, + {0x2160, 0x2170}, + {0x2161, 0x2171}, + {0x2162, 0x2172}, + {0x2163, 0x2173}, + {0x2164, 0x2174}, + {0x2165, 0x2175}, + {0x2166, 0x2176}, + {0x2167, 0x2177}, + {0x2168, 0x2178}, + {0x2169, 0x2179}, + {0x216a, 0x217a}, + {0x216b, 0x217b}, + {0x216c, 0x217c}, + {0x216d, 0x217d}, + {0x216e, 0x217e}, + {0x216f, 0x217f}, + {0x2170, 0x2160}, + {0x2171, 0x2161}, + {0x2172, 0x2162}, + {0x2173, 0x2163}, + {0x2174, 0x2164}, + {0x2175, 0x2165}, + {0x2176, 0x2166}, + {0x2177, 0x2167}, + {0x2178, 0x2168}, + {0x2179, 0x2169}, + {0x217a, 0x216a}, + {0x217b, 0x216b}, + {0x217c, 0x216c}, + {0x217d, 0x216d}, + {0x217e, 0x216e}, + {0x217f, 0x216f}, + {0x2183, 0x2184}, + {0x2184, 0x2183}, + {0x24b6, 0x24d0}, + {0x24b7, 0x24d1}, + {0x24b8, 0x24d2}, + {0x24b9, 0x24d3}, + {0x24ba, 0x24d4}, + {0x24bb, 0x24d5}, + {0x24bc, 0x24d6}, + {0x24bd, 0x24d7}, + {0x24be, 0x24d8}, + {0x24bf, 0x24d9}, + {0x24c0, 0x24da}, + {0x24c1, 0x24db}, + {0x24c2, 0x24dc}, + {0x24c3, 0x24dd}, + {0x24c4, 0x24de}, + {0x24c5, 0x24df}, + {0x24c6, 0x24e0}, + {0x24c7, 0x24e1}, + {0x24c8, 0x24e2}, + {0x24c9, 0x24e3}, + {0x24ca, 0x24e4}, + {0x24cb, 0x24e5}, + {0x24cc, 0x24e6}, + {0x24cd, 0x24e7}, + {0x24ce, 0x24e8}, + {0x24cf, 0x24e9}, + {0x24d0, 0x24b6}, + {0x24d1, 0x24b7}, + {0x24d2, 0x24b8}, + {0x24d3, 0x24b9}, + {0x24d4, 0x24ba}, + {0x24d5, 0x24bb}, + {0x24d6, 0x24bc}, + {0x24d7, 0x24bd}, + {0x24d8, 0x24be}, + {0x24d9, 0x24bf}, + {0x24da, 0x24c0}, + {0x24db, 0x24c1}, + {0x24dc, 0x24c2}, + {0x24dd, 0x24c3}, + {0x24de, 0x24c4}, + {0x24df, 0x24c5}, + {0x24e0, 0x24c6}, + {0x24e1, 0x24c7}, + {0x24e2, 0x24c8}, + {0x24e3, 0x24c9}, + {0x24e4, 0x24ca}, + {0x24e5, 0x24cb}, + {0x24e6, 0x24cc}, + {0x24e7, 0x24cd}, + {0x24e8, 0x24ce}, + {0x24e9, 0x24cf}, + {0x2c00, 0x2c30}, + {0x2c01, 0x2c31}, + {0x2c02, 0x2c32}, + {0x2c03, 0x2c33}, + {0x2c04, 0x2c34}, + {0x2c05, 0x2c35}, + {0x2c06, 0x2c36}, + {0x2c07, 0x2c37}, + {0x2c08, 0x2c38}, + {0x2c09, 0x2c39}, + {0x2c0a, 0x2c3a}, + {0x2c0b, 0x2c3b}, + {0x2c0c, 0x2c3c}, + {0x2c0d, 0x2c3d}, + {0x2c0e, 0x2c3e}, + {0x2c0f, 0x2c3f}, + {0x2c10, 0x2c40}, + {0x2c11, 0x2c41}, + {0x2c12, 0x2c42}, + {0x2c13, 0x2c43}, + {0x2c14, 0x2c44}, + {0x2c15, 0x2c45}, + {0x2c16, 0x2c46}, + {0x2c17, 0x2c47}, + {0x2c18, 0x2c48}, + {0x2c19, 0x2c49}, + {0x2c1a, 0x2c4a}, + {0x2c1b, 0x2c4b}, + {0x2c1c, 0x2c4c}, + {0x2c1d, 0x2c4d}, + {0x2c1e, 0x2c4e}, + {0x2c1f, 0x2c4f}, + {0x2c20, 0x2c50}, + {0x2c21, 0x2c51}, + {0x2c22, 0x2c52}, + {0x2c23, 0x2c53}, + {0x2c24, 0x2c54}, + {0x2c25, 0x2c55}, + {0x2c26, 0x2c56}, + {0x2c27, 0x2c57}, + {0x2c28, 0x2c58}, + {0x2c29, 0x2c59}, + {0x2c2a, 0x2c5a}, + {0x2c2b, 0x2c5b}, + {0x2c2c, 0x2c5c}, + {0x2c2d, 0x2c5d}, + {0x2c2e, 0x2c5e}, + {0x2c30, 0x2c00}, + {0x2c31, 0x2c01}, + {0x2c32, 0x2c02}, + {0x2c33, 0x2c03}, + {0x2c34, 0x2c04}, + {0x2c35, 0x2c05}, + {0x2c36, 0x2c06}, + {0x2c37, 0x2c07}, + {0x2c38, 0x2c08}, + {0x2c39, 0x2c09}, + {0x2c3a, 0x2c0a}, + {0x2c3b, 0x2c0b}, + {0x2c3c, 0x2c0c}, + {0x2c3d, 0x2c0d}, + {0x2c3e, 0x2c0e}, + {0x2c3f, 0x2c0f}, + {0x2c40, 0x2c10}, + {0x2c41, 0x2c11}, + {0x2c42, 0x2c12}, + {0x2c43, 0x2c13}, + {0x2c44, 0x2c14}, + {0x2c45, 0x2c15}, + {0x2c46, 0x2c16}, + {0x2c47, 0x2c17}, + {0x2c48, 0x2c18}, + {0x2c49, 0x2c19}, + {0x2c4a, 0x2c1a}, + {0x2c4b, 0x2c1b}, + {0x2c4c, 0x2c1c}, + {0x2c4d, 0x2c1d}, + {0x2c4e, 0x2c1e}, + {0x2c4f, 0x2c1f}, + {0x2c50, 0x2c20}, + {0x2c51, 0x2c21}, + {0x2c52, 0x2c22}, + {0x2c53, 0x2c23}, + {0x2c54, 0x2c24}, + {0x2c55, 0x2c25}, + {0x2c56, 0x2c26}, + {0x2c57, 0x2c27}, + {0x2c58, 0x2c28}, + {0x2c59, 0x2c29}, + {0x2c5a, 0x2c2a}, + {0x2c5b, 0x2c2b}, + {0x2c5c, 0x2c2c}, + {0x2c5d, 0x2c2d}, + {0x2c5e, 0x2c2e}, + {0x2c60, 0x2c61}, + {0x2c61, 0x2c60}, + {0x2c62, 0x26b}, + {0x2c63, 0x1d7d}, + {0x2c64, 0x27d}, + {0x2c65, 0x23a}, + {0x2c66, 0x23e}, + {0x2c67, 0x2c68}, + {0x2c68, 0x2c67}, + {0x2c69, 0x2c6a}, + {0x2c6a, 0x2c69}, + {0x2c6b, 0x2c6c}, + {0x2c6c, 0x2c6b}, + {0x2c6d, 0x251}, + {0x2c6e, 0x271}, + {0x2c6f, 0x250}, + {0x2c70, 0x252}, + {0x2c72, 0x2c73}, + {0x2c73, 0x2c72}, + {0x2c75, 0x2c76}, + {0x2c76, 0x2c75}, + {0x2c7e, 0x23f}, + {0x2c7f, 0x240}, + {0x2c80, 0x2c81}, + {0x2c81, 0x2c80}, + {0x2c82, 0x2c83}, + {0x2c83, 0x2c82}, + {0x2c84, 0x2c85}, + {0x2c85, 0x2c84}, + {0x2c86, 0x2c87}, + {0x2c87, 0x2c86}, + {0x2c88, 0x2c89}, + {0x2c89, 0x2c88}, + {0x2c8a, 0x2c8b}, + {0x2c8b, 0x2c8a}, + {0x2c8c, 0x2c8d}, + {0x2c8d, 0x2c8c}, + {0x2c8e, 0x2c8f}, + {0x2c8f, 0x2c8e}, + {0x2c90, 0x2c91}, + {0x2c91, 0x2c90}, + {0x2c92, 0x2c93}, + {0x2c93, 0x2c92}, + {0x2c94, 0x2c95}, + {0x2c95, 0x2c94}, + {0x2c96, 0x2c97}, + {0x2c97, 0x2c96}, + {0x2c98, 0x2c99}, + {0x2c99, 0x2c98}, + {0x2c9a, 0x2c9b}, + {0x2c9b, 0x2c9a}, + {0x2c9c, 0x2c9d}, + {0x2c9d, 0x2c9c}, + {0x2c9e, 0x2c9f}, + {0x2c9f, 0x2c9e}, + {0x2ca0, 0x2ca1}, + {0x2ca1, 0x2ca0}, + {0x2ca2, 0x2ca3}, + {0x2ca3, 0x2ca2}, + {0x2ca4, 0x2ca5}, + {0x2ca5, 0x2ca4}, + {0x2ca6, 0x2ca7}, + {0x2ca7, 0x2ca6}, + {0x2ca8, 0x2ca9}, + {0x2ca9, 0x2ca8}, + {0x2caa, 0x2cab}, + {0x2cab, 0x2caa}, + {0x2cac, 0x2cad}, + {0x2cad, 0x2cac}, + {0x2cae, 0x2caf}, + {0x2caf, 0x2cae}, + {0x2cb0, 0x2cb1}, + {0x2cb1, 0x2cb0}, + {0x2cb2, 0x2cb3}, + {0x2cb3, 0x2cb2}, + {0x2cb4, 0x2cb5}, + {0x2cb5, 0x2cb4}, + {0x2cb6, 0x2cb7}, + {0x2cb7, 0x2cb6}, + {0x2cb8, 0x2cb9}, + {0x2cb9, 0x2cb8}, + {0x2cba, 0x2cbb}, + {0x2cbb, 0x2cba}, + {0x2cbc, 0x2cbd}, + {0x2cbd, 0x2cbc}, + {0x2cbe, 0x2cbf}, + {0x2cbf, 0x2cbe}, + {0x2cc0, 0x2cc1}, + {0x2cc1, 0x2cc0}, + {0x2cc2, 0x2cc3}, + {0x2cc3, 0x2cc2}, + {0x2cc4, 0x2cc5}, + {0x2cc5, 0x2cc4}, + {0x2cc6, 0x2cc7}, + {0x2cc7, 0x2cc6}, + {0x2cc8, 0x2cc9}, + {0x2cc9, 0x2cc8}, + {0x2cca, 0x2ccb}, + {0x2ccb, 0x2cca}, + {0x2ccc, 0x2ccd}, + {0x2ccd, 0x2ccc}, + {0x2cce, 0x2ccf}, + {0x2ccf, 0x2cce}, + {0x2cd0, 0x2cd1}, + {0x2cd1, 0x2cd0}, + {0x2cd2, 0x2cd3}, + {0x2cd3, 0x2cd2}, + {0x2cd4, 0x2cd5}, + {0x2cd5, 0x2cd4}, + {0x2cd6, 0x2cd7}, + {0x2cd7, 0x2cd6}, + {0x2cd8, 0x2cd9}, + {0x2cd9, 0x2cd8}, + {0x2cda, 0x2cdb}, + {0x2cdb, 0x2cda}, + {0x2cdc, 0x2cdd}, + {0x2cdd, 0x2cdc}, + {0x2cde, 0x2cdf}, + {0x2cdf, 0x2cde}, + {0x2ce0, 0x2ce1}, + {0x2ce1, 0x2ce0}, + {0x2ce2, 0x2ce3}, + {0x2ce3, 0x2ce2}, + {0x2ceb, 0x2cec}, + {0x2cec, 0x2ceb}, + {0x2ced, 0x2cee}, + {0x2cee, 0x2ced}, + {0x2cf2, 0x2cf3}, + {0x2cf3, 0x2cf2}, + {0x2d00, 0x10a0}, + {0x2d01, 0x10a1}, + {0x2d02, 0x10a2}, + {0x2d03, 0x10a3}, + {0x2d04, 0x10a4}, + {0x2d05, 0x10a5}, + {0x2d06, 0x10a6}, + {0x2d07, 0x10a7}, + {0x2d08, 0x10a8}, + {0x2d09, 0x10a9}, + {0x2d0a, 0x10aa}, + {0x2d0b, 0x10ab}, + {0x2d0c, 0x10ac}, + {0x2d0d, 0x10ad}, + {0x2d0e, 0x10ae}, + {0x2d0f, 0x10af}, + {0x2d10, 0x10b0}, + {0x2d11, 0x10b1}, + {0x2d12, 0x10b2}, + {0x2d13, 0x10b3}, + {0x2d14, 0x10b4}, + {0x2d15, 0x10b5}, + {0x2d16, 0x10b6}, + {0x2d17, 0x10b7}, + {0x2d18, 0x10b8}, + {0x2d19, 0x10b9}, + {0x2d1a, 0x10ba}, + {0x2d1b, 0x10bb}, + {0x2d1c, 0x10bc}, + {0x2d1d, 0x10bd}, + {0x2d1e, 0x10be}, + {0x2d1f, 0x10bf}, + {0x2d20, 0x10c0}, + {0x2d21, 0x10c1}, + {0x2d22, 0x10c2}, + {0x2d23, 0x10c3}, + {0x2d24, 0x10c4}, + {0x2d25, 0x10c5}, + {0x2d27, 0x10c7}, + {0x2d2d, 0x10cd}, + {0xa640, 0xa641}, + {0xa641, 0xa640}, + {0xa642, 0xa643}, + {0xa643, 0xa642}, + {0xa644, 0xa645}, + {0xa645, 0xa644}, + {0xa646, 0xa647}, + {0xa647, 0xa646}, + {0xa648, 0xa649}, + {0xa649, 0xa648}, + {0xa64a, 0xa64b}, + {0xa64b, 0xa64a}, + {0xa64c, 0xa64d}, + {0xa64d, 0xa64c}, + {0xa64e, 0xa64f}, + {0xa64f, 0xa64e}, + {0xa650, 0xa651}, + {0xa651, 0xa650}, + {0xa652, 0xa653}, + {0xa653, 0xa652}, + {0xa654, 0xa655}, + {0xa655, 0xa654}, + {0xa656, 0xa657}, + {0xa657, 0xa656}, + {0xa658, 0xa659}, + {0xa659, 0xa658}, + {0xa65a, 0xa65b}, + {0xa65b, 0xa65a}, + {0xa65c, 0xa65d}, + {0xa65d, 0xa65c}, + {0xa65e, 0xa65f}, + {0xa65f, 0xa65e}, + {0xa660, 0xa661}, + {0xa661, 0xa660}, + {0xa662, 0xa663}, + {0xa663, 0xa662}, + {0xa664, 0xa665}, + {0xa665, 0xa664}, + {0xa666, 0xa667}, + {0xa667, 0xa666}, + {0xa668, 0xa669}, + {0xa669, 0xa668}, + {0xa66a, 0xa66b}, + {0xa66b, 0xa66a}, + {0xa66c, 0xa66d}, + {0xa66d, 0xa66c}, + {0xa680, 0xa681}, + {0xa681, 0xa680}, + {0xa682, 0xa683}, + {0xa683, 0xa682}, + {0xa684, 0xa685}, + {0xa685, 0xa684}, + {0xa686, 0xa687}, + {0xa687, 0xa686}, + {0xa688, 0xa689}, + {0xa689, 0xa688}, + {0xa68a, 0xa68b}, + {0xa68b, 0xa68a}, + {0xa68c, 0xa68d}, + {0xa68d, 0xa68c}, + {0xa68e, 0xa68f}, + {0xa68f, 0xa68e}, + {0xa690, 0xa691}, + {0xa691, 0xa690}, + {0xa692, 0xa693}, + {0xa693, 0xa692}, + {0xa694, 0xa695}, + {0xa695, 0xa694}, + {0xa696, 0xa697}, + {0xa697, 0xa696}, + {0xa698, 0xa699}, + {0xa699, 0xa698}, + {0xa69a, 0xa69b}, + {0xa69b, 0xa69a}, + {0xa722, 0xa723}, + {0xa723, 0xa722}, + {0xa724, 0xa725}, + {0xa725, 0xa724}, + {0xa726, 0xa727}, + {0xa727, 0xa726}, + {0xa728, 0xa729}, + {0xa729, 0xa728}, + {0xa72a, 0xa72b}, + {0xa72b, 0xa72a}, + {0xa72c, 0xa72d}, + {0xa72d, 0xa72c}, + {0xa72e, 0xa72f}, + {0xa72f, 0xa72e}, + {0xa732, 0xa733}, + {0xa733, 0xa732}, + {0xa734, 0xa735}, + {0xa735, 0xa734}, + {0xa736, 0xa737}, + {0xa737, 0xa736}, + {0xa738, 0xa739}, + {0xa739, 0xa738}, + {0xa73a, 0xa73b}, + {0xa73b, 0xa73a}, + {0xa73c, 0xa73d}, + {0xa73d, 0xa73c}, + {0xa73e, 0xa73f}, + {0xa73f, 0xa73e}, + {0xa740, 0xa741}, + {0xa741, 0xa740}, + {0xa742, 0xa743}, + {0xa743, 0xa742}, + {0xa744, 0xa745}, + {0xa745, 0xa744}, + {0xa746, 0xa747}, + {0xa747, 0xa746}, + {0xa748, 0xa749}, + {0xa749, 0xa748}, + {0xa74a, 0xa74b}, + {0xa74b, 0xa74a}, + {0xa74c, 0xa74d}, + {0xa74d, 0xa74c}, + {0xa74e, 0xa74f}, + {0xa74f, 0xa74e}, + {0xa750, 0xa751}, + {0xa751, 0xa750}, + {0xa752, 0xa753}, + {0xa753, 0xa752}, + {0xa754, 0xa755}, + {0xa755, 0xa754}, + {0xa756, 0xa757}, + {0xa757, 0xa756}, + {0xa758, 0xa759}, + {0xa759, 0xa758}, + {0xa75a, 0xa75b}, + {0xa75b, 0xa75a}, + {0xa75c, 0xa75d}, + {0xa75d, 0xa75c}, + {0xa75e, 0xa75f}, + {0xa75f, 0xa75e}, + {0xa760, 0xa761}, + {0xa761, 0xa760}, + {0xa762, 0xa763}, + {0xa763, 0xa762}, + {0xa764, 0xa765}, + {0xa765, 0xa764}, + {0xa766, 0xa767}, + {0xa767, 0xa766}, + {0xa768, 0xa769}, + {0xa769, 0xa768}, + {0xa76a, 0xa76b}, + {0xa76b, 0xa76a}, + {0xa76c, 0xa76d}, + {0xa76d, 0xa76c}, + {0xa76e, 0xa76f}, + {0xa76f, 0xa76e}, + {0xa779, 0xa77a}, + {0xa77a, 0xa779}, + {0xa77b, 0xa77c}, + {0xa77c, 0xa77b}, + {0xa77d, 0x1d79}, + {0xa77e, 0xa77f}, + {0xa77f, 0xa77e}, + {0xa780, 0xa781}, + {0xa781, 0xa780}, + {0xa782, 0xa783}, + {0xa783, 0xa782}, + {0xa784, 0xa785}, + {0xa785, 0xa784}, + {0xa786, 0xa787}, + {0xa787, 0xa786}, + {0xa78b, 0xa78c}, + {0xa78c, 0xa78b}, + {0xa78d, 0x265}, + {0xa790, 0xa791}, + {0xa791, 0xa790}, + {0xa792, 0xa793}, + {0xa793, 0xa792}, + {0xa796, 0xa797}, + {0xa797, 0xa796}, + {0xa798, 0xa799}, + {0xa799, 0xa798}, + {0xa79a, 0xa79b}, + {0xa79b, 0xa79a}, + {0xa79c, 0xa79d}, + {0xa79d, 0xa79c}, + {0xa79e, 0xa79f}, + {0xa79f, 0xa79e}, + {0xa7a0, 0xa7a1}, + {0xa7a1, 0xa7a0}, + {0xa7a2, 0xa7a3}, + {0xa7a3, 0xa7a2}, + {0xa7a4, 0xa7a5}, + {0xa7a5, 0xa7a4}, + {0xa7a6, 0xa7a7}, + {0xa7a7, 0xa7a6}, + {0xa7a8, 0xa7a9}, + {0xa7a9, 0xa7a8}, + {0xa7aa, 0x266}, + {0xa7ab, 0x25c}, + {0xa7ac, 0x261}, + {0xa7ad, 0x26c}, + {0xa7b0, 0x29e}, + {0xa7b1, 0x287}, + {0xff21, 0xff41}, + {0xff22, 0xff42}, + {0xff23, 0xff43}, + {0xff24, 0xff44}, + {0xff25, 0xff45}, + {0xff26, 0xff46}, + {0xff27, 0xff47}, + {0xff28, 0xff48}, + {0xff29, 0xff49}, + {0xff2a, 0xff4a}, + {0xff2b, 0xff4b}, + {0xff2c, 0xff4c}, + {0xff2d, 0xff4d}, + {0xff2e, 0xff4e}, + {0xff2f, 0xff4f}, + {0xff30, 0xff50}, + {0xff31, 0xff51}, + {0xff32, 0xff52}, + {0xff33, 0xff53}, + {0xff34, 0xff54}, + {0xff35, 0xff55}, + {0xff36, 0xff56}, + {0xff37, 0xff57}, + {0xff38, 0xff58}, + {0xff39, 0xff59}, + {0xff3a, 0xff5a}, + {0xff41, 0xff21}, + {0xff42, 0xff22}, + {0xff43, 0xff23}, + {0xff44, 0xff24}, + {0xff45, 0xff25}, + {0xff46, 0xff26}, + {0xff47, 0xff27}, + {0xff48, 0xff28}, + {0xff49, 0xff29}, + {0xff4a, 0xff2a}, + {0xff4b, 0xff2b}, + {0xff4c, 0xff2c}, + {0xff4d, 0xff2d}, + {0xff4e, 0xff2e}, + {0xff4f, 0xff2f}, + {0xff50, 0xff30}, + {0xff51, 0xff31}, + {0xff52, 0xff32}, + {0xff53, 0xff33}, + {0xff54, 0xff34}, + {0xff55, 0xff35}, + {0xff56, 0xff36}, + {0xff57, 0xff37}, + {0xff58, 0xff38}, + {0xff59, 0xff39}, + {0xff5a, 0xff3a}, + {0x10400, 0x10428}, + {0x10401, 0x10429}, + {0x10402, 0x1042a}, + {0x10403, 0x1042b}, + {0x10404, 0x1042c}, + {0x10405, 0x1042d}, + {0x10406, 0x1042e}, + {0x10407, 0x1042f}, + {0x10408, 0x10430}, + {0x10409, 0x10431}, + {0x1040a, 0x10432}, + {0x1040b, 0x10433}, + {0x1040c, 0x10434}, + {0x1040d, 0x10435}, + {0x1040e, 0x10436}, + {0x1040f, 0x10437}, + {0x10410, 0x10438}, + {0x10411, 0x10439}, + {0x10412, 0x1043a}, + {0x10413, 0x1043b}, + {0x10414, 0x1043c}, + {0x10415, 0x1043d}, + {0x10416, 0x1043e}, + {0x10417, 0x1043f}, + {0x10418, 0x10440}, + {0x10419, 0x10441}, + {0x1041a, 0x10442}, + {0x1041b, 0x10443}, + {0x1041c, 0x10444}, + {0x1041d, 0x10445}, + {0x1041e, 0x10446}, + {0x1041f, 0x10447}, + {0x10420, 0x10448}, + {0x10421, 0x10449}, + {0x10422, 0x1044a}, + {0x10423, 0x1044b}, + {0x10424, 0x1044c}, + {0x10425, 0x1044d}, + {0x10426, 0x1044e}, + {0x10427, 0x1044f}, + {0x10428, 0x10400}, + {0x10429, 0x10401}, + {0x1042a, 0x10402}, + {0x1042b, 0x10403}, + {0x1042c, 0x10404}, + {0x1042d, 0x10405}, + {0x1042e, 0x10406}, + {0x1042f, 0x10407}, + {0x10430, 0x10408}, + {0x10431, 0x10409}, + {0x10432, 0x1040a}, + {0x10433, 0x1040b}, + {0x10434, 0x1040c}, + {0x10435, 0x1040d}, + {0x10436, 0x1040e}, + {0x10437, 0x1040f}, + {0x10438, 0x10410}, + {0x10439, 0x10411}, + {0x1043a, 0x10412}, + {0x1043b, 0x10413}, + {0x1043c, 0x10414}, + {0x1043d, 0x10415}, + {0x1043e, 0x10416}, + {0x1043f, 0x10417}, + {0x10440, 0x10418}, + {0x10441, 0x10419}, + {0x10442, 0x1041a}, + {0x10443, 0x1041b}, + {0x10444, 0x1041c}, + {0x10445, 0x1041d}, + {0x10446, 0x1041e}, + {0x10447, 0x1041f}, + {0x10448, 0x10420}, + {0x10449, 0x10421}, + {0x1044a, 0x10422}, + {0x1044b, 0x10423}, + {0x1044c, 0x10424}, + {0x1044d, 0x10425}, + {0x1044e, 0x10426}, + {0x1044f, 0x10427}, + {0x118a0, 0x118c0}, + {0x118a1, 0x118c1}, + {0x118a2, 0x118c2}, + {0x118a3, 0x118c3}, + {0x118a4, 0x118c4}, + {0x118a5, 0x118c5}, + {0x118a6, 0x118c6}, + {0x118a7, 0x118c7}, + {0x118a8, 0x118c8}, + {0x118a9, 0x118c9}, + {0x118aa, 0x118ca}, + {0x118ab, 0x118cb}, + {0x118ac, 0x118cc}, + {0x118ad, 0x118cd}, + {0x118ae, 0x118ce}, + {0x118af, 0x118cf}, + {0x118b0, 0x118d0}, + {0x118b1, 0x118d1}, + {0x118b2, 0x118d2}, + {0x118b3, 0x118d3}, + {0x118b4, 0x118d4}, + {0x118b5, 0x118d5}, + {0x118b6, 0x118d6}, + {0x118b7, 0x118d7}, + {0x118b8, 0x118d8}, + {0x118b9, 0x118d9}, + {0x118ba, 0x118da}, + {0x118bb, 0x118db}, + {0x118bc, 0x118dc}, + {0x118bd, 0x118dd}, + {0x118be, 0x118de}, + {0x118bf, 0x118df}, + {0x118c0, 0x118a0}, + {0x118c1, 0x118a1}, + {0x118c2, 0x118a2}, + {0x118c3, 0x118a3}, + {0x118c4, 0x118a4}, + {0x118c5, 0x118a5}, + {0x118c6, 0x118a6}, + {0x118c7, 0x118a7}, + {0x118c8, 0x118a8}, + {0x118c9, 0x118a9}, + {0x118ca, 0x118aa}, + {0x118cb, 0x118ab}, + {0x118cc, 0x118ac}, + {0x118cd, 0x118ad}, + {0x118ce, 0x118ae}, + {0x118cf, 0x118af}, + {0x118d0, 0x118b0}, + {0x118d1, 0x118b1}, + {0x118d2, 0x118b2}, + {0x118d3, 0x118b3}, + {0x118d4, 0x118b4}, + {0x118d5, 0x118b5}, + {0x118d6, 0x118b6}, + {0x118d7, 0x118b7}, + {0x118d8, 0x118b8}, + {0x118d9, 0x118b9}, + {0x118da, 0x118ba}, + {0x118db, 0x118bb}, + {0x118dc, 0x118bc}, + {0x118dd, 0x118bd}, + {0x118de, 0x118be}, + {0x118df, 0x118bf}, +}; + +#endif // UCP_TABLE_DEFINE_FN + +} // namespace ue2 + +#endif + diff --git a/contrib/libs/hyperscan/src/parser/unsupported.cpp b/contrib/libs/hyperscan/src/parser/unsupported.cpp index ccb555d7ca..c4b18b6a30 100644 --- a/contrib/libs/hyperscan/src/parser/unsupported.cpp +++ b/contrib/libs/hyperscan/src/parser/unsupported.cpp @@ -1,88 +1,88 @@ -/* +/* * Copyright (c) 2015-2016, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -/** \file - * \brief Checks component trees for unsupported components. - */ -#include "ConstComponentVisitor.h" -#include "ComponentEUS.h" -#include "ComponentRepeat.h" -#include "ComponentWordBoundary.h" -#include "parse_error.h" -#include "unsupported.h" - -#include <sstream> - -namespace ue2 { - -/** \brief Visitor class that throws a ParseError exception when it encounters - * an unsupported component. */ -class UnsupportedVisitor : public DefaultConstComponentVisitor { -public: - ~UnsupportedVisitor() override; + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/** \file + * \brief Checks component trees for unsupported components. + */ +#include "ConstComponentVisitor.h" +#include "ComponentEUS.h" +#include "ComponentRepeat.h" +#include "ComponentWordBoundary.h" +#include "parse_error.h" +#include "unsupported.h" + +#include <sstream> + +namespace ue2 { + +/** \brief Visitor class that throws a ParseError exception when it encounters + * an unsupported component. */ +class UnsupportedVisitor : public DefaultConstComponentVisitor { +public: + ~UnsupportedVisitor() override; using DefaultConstComponentVisitor::pre; - void pre(const ComponentAssertion &) override { - throw ParseError("Zero-width assertions are not supported."); - } - void pre(const ComponentAtomicGroup &) override { - throw ParseError("Atomic groups are unsupported."); - } - void pre(const ComponentBackReference &) override { - throw ParseError("Back-references are unsupported."); - } - void pre(const ComponentCondReference &) override { - throw ParseError("Conditional references are not supported."); - } - void pre(const ComponentEUS &c) override { - std::ostringstream str; - str << "\\X unsupported at index " << c.loc << "."; - throw ParseError(str.str()); - } - void pre(const ComponentRepeat &c) override { - if (c.type == ComponentRepeat::REPEAT_POSSESSIVE) { - throw ParseError("Possessive quantifiers are not supported."); - } - } - void pre(const ComponentWordBoundary &c) override { - if (c.ucp && !c.prefilter) { - std::ostringstream str; - str << (!c.negated ? "\\b" : "\\B") - << " unsupported in UCP mode at index " << c.loc << "."; - throw ParseError(str.str()); - } - } -}; - -UnsupportedVisitor::~UnsupportedVisitor() {} - -void checkUnsupported(const Component &root) { - UnsupportedVisitor vis; - root.accept(vis); -} - -} // namespace ue2 + void pre(const ComponentAssertion &) override { + throw ParseError("Zero-width assertions are not supported."); + } + void pre(const ComponentAtomicGroup &) override { + throw ParseError("Atomic groups are unsupported."); + } + void pre(const ComponentBackReference &) override { + throw ParseError("Back-references are unsupported."); + } + void pre(const ComponentCondReference &) override { + throw ParseError("Conditional references are not supported."); + } + void pre(const ComponentEUS &c) override { + std::ostringstream str; + str << "\\X unsupported at index " << c.loc << "."; + throw ParseError(str.str()); + } + void pre(const ComponentRepeat &c) override { + if (c.type == ComponentRepeat::REPEAT_POSSESSIVE) { + throw ParseError("Possessive quantifiers are not supported."); + } + } + void pre(const ComponentWordBoundary &c) override { + if (c.ucp && !c.prefilter) { + std::ostringstream str; + str << (!c.negated ? "\\b" : "\\B") + << " unsupported in UCP mode at index " << c.loc << "."; + throw ParseError(str.str()); + } + } +}; + +UnsupportedVisitor::~UnsupportedVisitor() {} + +void checkUnsupported(const Component &root) { + UnsupportedVisitor vis; + root.accept(vis); +} + +} // namespace ue2 diff --git a/contrib/libs/hyperscan/src/parser/unsupported.h b/contrib/libs/hyperscan/src/parser/unsupported.h index 41a75a1b13..f3905cdf06 100644 --- a/contrib/libs/hyperscan/src/parser/unsupported.h +++ b/contrib/libs/hyperscan/src/parser/unsupported.h @@ -1,47 +1,47 @@ -/* - * Copyright (c) 2015, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -/** \file - * \brief Checks component trees for unsupported components. - */ -#ifndef PARSER_UNSUPPORTED_H_ -#define PARSER_UNSUPPORTED_H_ - -#include "parse_error.h" - -namespace ue2 { - -class Component; - -/** \brief Throws a ParseError if this component tree contains an unsupported - * Component. */ -void checkUnsupported(const Component &root); - -} // namespace - -#endif // PARSER_UNSUPPORTED_H_ +/* + * Copyright (c) 2015, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/** \file + * \brief Checks component trees for unsupported components. + */ +#ifndef PARSER_UNSUPPORTED_H_ +#define PARSER_UNSUPPORTED_H_ + +#include "parse_error.h" + +namespace ue2 { + +class Component; + +/** \brief Throws a ParseError if this component tree contains an unsupported + * Component. */ +void checkUnsupported(const Component &root); + +} // namespace + +#endif // PARSER_UNSUPPORTED_H_ diff --git a/contrib/libs/hyperscan/src/parser/utf8_validate.cpp b/contrib/libs/hyperscan/src/parser/utf8_validate.cpp index 77807d5d97..50aa06d8e7 100644 --- a/contrib/libs/hyperscan/src/parser/utf8_validate.cpp +++ b/contrib/libs/hyperscan/src/parser/utf8_validate.cpp @@ -1,162 +1,162 @@ -/* - * Copyright (c) 2015, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -#include "config.h" - -#include "utf8_validate.h" - -#include "ue2common.h" -#include "util/unicode_def.h" - -#include <cstring> - -namespace ue2 { - -static -bool hasValidContBytes(const u8 *s, size_t num) { - /* continuer bytes must all be of the form 10xx xxxx */ - for (size_t i = 0; i < num; i++) { - if ((s[i] & 0xc0) != UTF_CONT_BYTE_HEADER) { - return false; - } - } - return true; -} - -static -bool isAllowedCodepoint(u32 val) { - if (val >= 0xd800 && val <= 0xdfff) { - return false; // High and low surrogate halves - } - if (val > 0x10ffff) { - return false; // As per limit in RFC 3629 - } - - return true; -} - +/* + * Copyright (c) 2015, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include "config.h" + +#include "utf8_validate.h" + +#include "ue2common.h" +#include "util/unicode_def.h" + +#include <cstring> + +namespace ue2 { + +static +bool hasValidContBytes(const u8 *s, size_t num) { + /* continuer bytes must all be of the form 10xx xxxx */ + for (size_t i = 0; i < num; i++) { + if ((s[i] & 0xc0) != UTF_CONT_BYTE_HEADER) { + return false; + } + } + return true; +} + +static +bool isAllowedCodepoint(u32 val) { + if (val >= 0xd800 && val <= 0xdfff) { + return false; // High and low surrogate halves + } + if (val > 0x10ffff) { + return false; // As per limit in RFC 3629 + } + + return true; +} + bool isValidUtf8(const char *expression, const size_t len) { - if (!expression) { - return true; - } - - const u8 *s = (const u8 *)expression; - u32 val; - - size_t i = 0; - while (i < len) { - DEBUG_PRINTF("byte %zu: 0x%02x\n", i, s[i]); - // One octet. - if (s[i] < 0x7f) { - DEBUG_PRINTF("one octet\n"); - i++; - continue; - } - - // Two octets. - if ((s[i] & 0xe0) == UTF_TWO_BYTE_HEADER) { - DEBUG_PRINTF("two octets\n"); - if (i + 2 > len) { - break; - } - if (!hasValidContBytes(&s[i] + 1, 1)) { - break; - } - val = ((s[i] & 0x1f) << 6) | (s[i + 1] & UTF_CONT_BYTE_VALUE_MASK); - DEBUG_PRINTF("val=0x%x\n", val); - if (val < 1U << 7) { - DEBUG_PRINTF("overlong encoding\n"); - break; - } - if (!isAllowedCodepoint(val)) { - DEBUG_PRINTF("codepoint not allowed\n"); - break; - } - i += 2; - continue; - } - - // Three octets. - if ((s[i] & 0xf0) == UTF_THREE_BYTE_HEADER) { - DEBUG_PRINTF("three octets\n"); - if (i + 3 > len) { - break; - } - if (!hasValidContBytes(&s[i] + 1, 2)) { - break; - } - val = ((s[i] & 0xf) << 12) | - ((s[i + 1] & UTF_CONT_BYTE_VALUE_MASK) << 6) | - (s[i + 2] & UTF_CONT_BYTE_VALUE_MASK); - if (val < 1U << 11) { - DEBUG_PRINTF("overlong encoding\n"); - break; - } - if (!isAllowedCodepoint(val)) { - DEBUG_PRINTF("codepoint not allowed\n"); - break; - } - i += 3; - continue; - } - - // Four octets. - if ((s[i] & 0xf8) == UTF_FOUR_BYTE_HEADER) { - DEBUG_PRINTF("four octets\n"); - if (i + 4 > len) { - break; - } - if (!hasValidContBytes(&s[i] + 1, 3)) { - break; - } - val = ((s[i] & 0xf) << 18) | - ((s[i + 1] & UTF_CONT_BYTE_VALUE_MASK) << 12) | - ((s[i + 2] & UTF_CONT_BYTE_VALUE_MASK) << 6) | - (s[i + 3] & UTF_CONT_BYTE_VALUE_MASK); - if (val < 1U << 16) { - DEBUG_PRINTF("overlong encoding\n"); - break; - } - if (!isAllowedCodepoint(val)) { - DEBUG_PRINTF("codepoint not allowed\n"); - break; - } - i += 4; - continue; - } - - // Something else? - DEBUG_PRINTF("bad byte 0x%02x\n", s[i]); - break; - } - - DEBUG_PRINTF("i=%zu, len=%zu\n", i, len); - return i == len; -} - -} // namespace ue2 + if (!expression) { + return true; + } + + const u8 *s = (const u8 *)expression; + u32 val; + + size_t i = 0; + while (i < len) { + DEBUG_PRINTF("byte %zu: 0x%02x\n", i, s[i]); + // One octet. + if (s[i] < 0x7f) { + DEBUG_PRINTF("one octet\n"); + i++; + continue; + } + + // Two octets. + if ((s[i] & 0xe0) == UTF_TWO_BYTE_HEADER) { + DEBUG_PRINTF("two octets\n"); + if (i + 2 > len) { + break; + } + if (!hasValidContBytes(&s[i] + 1, 1)) { + break; + } + val = ((s[i] & 0x1f) << 6) | (s[i + 1] & UTF_CONT_BYTE_VALUE_MASK); + DEBUG_PRINTF("val=0x%x\n", val); + if (val < 1U << 7) { + DEBUG_PRINTF("overlong encoding\n"); + break; + } + if (!isAllowedCodepoint(val)) { + DEBUG_PRINTF("codepoint not allowed\n"); + break; + } + i += 2; + continue; + } + + // Three octets. + if ((s[i] & 0xf0) == UTF_THREE_BYTE_HEADER) { + DEBUG_PRINTF("three octets\n"); + if (i + 3 > len) { + break; + } + if (!hasValidContBytes(&s[i] + 1, 2)) { + break; + } + val = ((s[i] & 0xf) << 12) | + ((s[i + 1] & UTF_CONT_BYTE_VALUE_MASK) << 6) | + (s[i + 2] & UTF_CONT_BYTE_VALUE_MASK); + if (val < 1U << 11) { + DEBUG_PRINTF("overlong encoding\n"); + break; + } + if (!isAllowedCodepoint(val)) { + DEBUG_PRINTF("codepoint not allowed\n"); + break; + } + i += 3; + continue; + } + + // Four octets. + if ((s[i] & 0xf8) == UTF_FOUR_BYTE_HEADER) { + DEBUG_PRINTF("four octets\n"); + if (i + 4 > len) { + break; + } + if (!hasValidContBytes(&s[i] + 1, 3)) { + break; + } + val = ((s[i] & 0xf) << 18) | + ((s[i + 1] & UTF_CONT_BYTE_VALUE_MASK) << 12) | + ((s[i + 2] & UTF_CONT_BYTE_VALUE_MASK) << 6) | + (s[i + 3] & UTF_CONT_BYTE_VALUE_MASK); + if (val < 1U << 16) { + DEBUG_PRINTF("overlong encoding\n"); + break; + } + if (!isAllowedCodepoint(val)) { + DEBUG_PRINTF("codepoint not allowed\n"); + break; + } + i += 4; + continue; + } + + // Something else? + DEBUG_PRINTF("bad byte 0x%02x\n", s[i]); + break; + } + + DEBUG_PRINTF("i=%zu, len=%zu\n", i, len); + return i == len; +} + +} // namespace ue2 diff --git a/contrib/libs/hyperscan/src/parser/utf8_validate.h b/contrib/libs/hyperscan/src/parser/utf8_validate.h index 938454c4a9..6389a0859f 100644 --- a/contrib/libs/hyperscan/src/parser/utf8_validate.h +++ b/contrib/libs/hyperscan/src/parser/utf8_validate.h @@ -1,41 +1,41 @@ -/* - * Copyright (c) 2015, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -#ifndef PARSER_UTF8_VALIDATE_H -#define PARSER_UTF8_VALIDATE_H - +/* + * Copyright (c) 2015, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef PARSER_UTF8_VALIDATE_H +#define PARSER_UTF8_VALIDATE_H + #include <cstddef> // size_t -namespace ue2 { - -/** \brief Validate that the given expression is well-formed UTF-8. */ +namespace ue2 { + +/** \brief Validate that the given expression is well-formed UTF-8. */ bool isValidUtf8(const char *expression, const size_t len); - -} // namespace ue2 - -#endif // PARSER_UTF8_VALIDATE_H + +} // namespace ue2 + +#endif // PARSER_UTF8_VALIDATE_H |