diff options
author | Ivan Blinkov <ivan@blinkov.ru> | 2022-02-10 16:47:10 +0300 |
---|---|---|
committer | Daniil Cherednik <dcherednik@yandex-team.ru> | 2022-02-10 16:47:10 +0300 |
commit | 1aeb9a455974457866f78722ad98114bafc84e8a (patch) | |
tree | e4340eaf1668684d83a0a58c36947c5def5350ad /contrib/libs/hyperscan/src/parser | |
parent | bd5ef432f5cfb1e18851381329d94665a4c22470 (diff) | |
download | ydb-1aeb9a455974457866f78722ad98114bafc84e8a.tar.gz |
Restoring authorship annotation for Ivan Blinkov <ivan@blinkov.ru>. Commit 1 of 2.
Diffstat (limited to 'contrib/libs/hyperscan/src/parser')
17 files changed, 441 insertions, 441 deletions
diff --git a/contrib/libs/hyperscan/src/parser/ComponentCondReference.h b/contrib/libs/hyperscan/src/parser/ComponentCondReference.h index c0ee9ac3ac..91c560ad89 100644 --- a/contrib/libs/hyperscan/src/parser/ComponentCondReference.h +++ b/contrib/libs/hyperscan/src/parser/ComponentCondReference.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -46,9 +46,9 @@ class ComponentCondReference : public ComponentSequence { friend class ReferenceVisitor; friend class PrintVisitor; public: - explicit ComponentCondReference(unsigned ref); - explicit ComponentCondReference(const std::string &name); - explicit ComponentCondReference(std::unique_ptr<Component> c); + explicit ComponentCondReference(unsigned ref); + explicit ComponentCondReference(const std::string &name); + explicit ComponentCondReference(std::unique_ptr<Component> c); ~ComponentCondReference() override; ComponentCondReference *clone() const override; diff --git a/contrib/libs/hyperscan/src/parser/ComponentRepeat.cpp b/contrib/libs/hyperscan/src/parser/ComponentRepeat.cpp index 09f59d05ec..47bc772529 100644 --- a/contrib/libs/hyperscan/src/parser/ComponentRepeat.cpp +++ b/contrib/libs/hyperscan/src/parser/ComponentRepeat.cpp @@ -87,7 +87,7 @@ ComponentRepeat::ComponentRepeat(const ComponentRepeat &other) type(other.type), sub_comp(unique_ptr<Component>(other.sub_comp->clone())), m_min(other.m_min), m_max(other.m_max), m_firsts(other.m_firsts), m_lasts(other.m_lasts), - posFirst(other.posFirst), posLast(other.posLast) {} + posFirst(other.posFirst), posLast(other.posLast) {} bool ComponentRepeat::empty() const { return m_min == 0 || sub_comp->empty(); @@ -174,24 +174,24 @@ void ComponentRepeat::notePositions(GlushkovBuildState &bs) { } recordPosBounds(posFirst, bs.getBuilder().numVertices()); - - // Each optional repeat has an epsilon at the end of its firsts list. - for (u32 i = m_min; i < m_firsts.size(); i++) { - m_firsts[i].push_back(GlushkovBuildState::POS_EPSILON); - } - + + // Each optional repeat has an epsilon at the end of its firsts list. + for (u32 i = m_min; i < m_firsts.size(); i++) { + m_firsts[i].push_back(GlushkovBuildState::POS_EPSILON); + } + } vector<PositionInfo> ComponentRepeat::first() const { - if (!m_max) { - return {}; - } - - assert(!m_firsts.empty()); // notePositions should already have run - const vector<PositionInfo> &firsts = m_firsts.front(); - DEBUG_PRINTF("firsts = %s\n", - dumpPositions(begin(firsts), end(firsts)).c_str()); - return firsts; + if (!m_max) { + return {}; + } + + assert(!m_firsts.empty()); // notePositions should already have run + const vector<PositionInfo> &firsts = m_firsts.front(); + DEBUG_PRINTF("firsts = %s\n", + dumpPositions(begin(firsts), end(firsts)).c_str()); + return firsts; } void ComponentRepeat::buildFollowSet(GlushkovBuildState &bs, @@ -218,7 +218,7 @@ void ComponentRepeat::buildFollowSet(GlushkovBuildState &bs, } } - wireRepeats(bs); + wireRepeats(bs); DEBUG_PRINTF("leave\n"); } @@ -234,7 +234,7 @@ void ComponentRepeat::optimise(bool connected_to_sds) { } bool ComponentRepeat::vacuous_everywhere() const { - return !m_min || sub_comp->vacuous_everywhere(); + return !m_min || sub_comp->vacuous_everywhere(); } bool ComponentRepeat::checkEmbeddedStartAnchor(bool at_start) const { @@ -288,24 +288,24 @@ vector<PositionInfo> ComponentRepeat::last() const { assert(!m_firsts.empty()); // notePositions should already have run assert(!m_lasts.empty()); - const auto &l = m_min ? m_lasts[m_min - 1] : m_lasts[0]; - lasts.insert(lasts.end(), l.begin(), l.end()); - + const auto &l = m_min ? m_lasts[m_min - 1] : m_lasts[0]; + lasts.insert(lasts.end(), l.begin(), l.end()); + if (!m_min || m_min != m_lasts.size()) { lasts.insert(lasts.end(), m_lasts.back().begin(), m_lasts.back().end()); } - - DEBUG_PRINTF("lasts = %s\n", - dumpPositions(lasts.begin(), lasts.end()).c_str()); + + DEBUG_PRINTF("lasts = %s\n", + dumpPositions(lasts.begin(), lasts.end()).c_str()); return lasts; } -void ComponentRepeat::wireRepeats(GlushkovBuildState &bs) { +void ComponentRepeat::wireRepeats(GlushkovBuildState &bs) { /* note: m_lasts[0] already valid */ u32 copies = m_firsts.size(); const bool isEmpty = sub_comp->empty(); - const vector<PositionInfo> &optLasts = - m_min ? m_lasts[m_min - 1] : m_lasts[0]; + const vector<PositionInfo> &optLasts = + m_min ? m_lasts[m_min - 1] : m_lasts[0]; if (!copies) { goto inf_check; @@ -324,7 +324,7 @@ void ComponentRepeat::wireRepeats(GlushkovBuildState &bs) { DEBUG_PRINTF("wiring up %d optional repeats\n", copies - m_min); for (u32 rep = MAX(m_min, 1); rep < copies; rep++) { vector<PositionInfo> lasts = m_lasts[rep - 1]; - if (rep != m_min) { + if (rep != m_min) { lasts.insert(lasts.end(), optLasts.begin(), optLasts.end()); sort(lasts.begin(), lasts.end()); lasts.erase(unique(lasts.begin(), lasts.end()), lasts.end()); diff --git a/contrib/libs/hyperscan/src/parser/ComponentRepeat.h b/contrib/libs/hyperscan/src/parser/ComponentRepeat.h index 8905bfcf5e..8565f1bb26 100644 --- a/contrib/libs/hyperscan/src/parser/ComponentRepeat.h +++ b/contrib/libs/hyperscan/src/parser/ComponentRepeat.h @@ -42,31 +42,31 @@ namespace ue2 { -/** - * \brief Encapsulates a repeat of a subexpression ('*', '+', '?', '{M,N}', +/** + * \brief Encapsulates a repeat of a subexpression ('*', '+', '?', '{M,N}', * etc). * - * ASCII Art Time: + * ASCII Art Time: * * Our standard representation of standard repeats. Other constructions (fan-in * vs fan-out) would also be possible and equivalent for our purposes. * * {n,m} * - * S->M->M->M->O->O->O->T - * | ^ ^ ^ - * | | | | - * \-----------/ + * S->M->M->M->O->O->O->T + * | ^ ^ ^ + * | | | | + * \-----------/ * * {0,m} * - * /-----------\ - * | | - * | V - * S->O->O->O->T - * | ^ ^ ^ - * | | | | - * \--------/ + * /-----------\ + * | | + * | V + * S->O->O->O->T + * | ^ ^ ^ + * | | | | + * \--------/ * */ class ComponentRepeat : public Component { @@ -121,7 +121,7 @@ public: protected: void postSubNotePositionHook(); - void wireRepeats(GlushkovBuildState &bs); + void wireRepeats(GlushkovBuildState &bs); std::unique_ptr<Component> sub_comp; u32 m_min; diff --git a/contrib/libs/hyperscan/src/parser/Parser.h b/contrib/libs/hyperscan/src/parser/Parser.h index a034a18fc1..69844eed7e 100644 --- a/contrib/libs/hyperscan/src/parser/Parser.h +++ b/contrib/libs/hyperscan/src/parser/Parser.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2017, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -69,7 +69,7 @@ struct ParseMode { * * This call will throw a ParseError on failure. */ -std::unique_ptr<Component> parse(const char *ptr, ParseMode &mode); +std::unique_ptr<Component> parse(const char *ptr, ParseMode &mode); } // namespace ue2 diff --git a/contrib/libs/hyperscan/src/parser/Parser.rl6 b/contrib/libs/hyperscan/src/parser/Parser.rl6 index 8643aebfc6..0b529f995c 100644 --- a/contrib/libs/hyperscan/src/parser/Parser.rl6 +++ b/contrib/libs/hyperscan/src/parser/Parser.rl6 @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2017, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -34,7 +34,7 @@ /* Parser.cpp is a built source, may not be in same dir as parser files */ #include "parser/check_refs.h" -#include "parser/control_verbs.h" +#include "parser/control_verbs.h" #include "parser/ComponentAlternation.h" #include "parser/ComponentAssertion.h" #include "parser/ComponentAtomicGroup.h" @@ -53,7 +53,7 @@ #include "parser/Parser.h" #include "ue2common.h" #include "util/compare.h" -#include "util/flat_containers.h" +#include "util/flat_containers.h" #include "util/make_unique.h" #include "util/unicode_def.h" #include "util/verify_types.h" @@ -116,7 +116,7 @@ unsigned parseAsDecimal(unsigned oct) { static constexpr u32 MAX_NUMBER = INT_MAX; static -void pushDec(u32 *acc, char raw_digit) { +void pushDec(u32 *acc, char raw_digit) { assert(raw_digit >= '0' && raw_digit <= '9'); u32 digit_val = raw_digit - '0'; @@ -130,7 +130,7 @@ void pushDec(u32 *acc, char raw_digit) { } static -void pushOct(u32 *acc, char raw_digit) { +void pushOct(u32 *acc, char raw_digit) { assert(raw_digit >= '0' && raw_digit <= '7'); u32 digit_val = raw_digit - '0'; @@ -169,7 +169,7 @@ ComponentSequence *enterSequence(ComponentSequence *parent, } static -void addLiteral(ComponentSequence *currentSeq, char c, const ParseMode &mode) { +void addLiteral(ComponentSequence *currentSeq, char c, const ParseMode &mode) { if (mode.utf8 && mode.caseless) { /* leverage ComponentClass to generate the vertices */ auto cc = getComponentClass(mode); @@ -196,7 +196,7 @@ void addEscaped(ComponentSequence *currentSeq, unichar accum, if (accum > 255) { throw LocatedParseError(err_msg); } - addLiteral(currentSeq, (char)accum, mode); + addLiteral(currentSeq, (char)accum, mode); } } @@ -216,7 +216,7 @@ void addEscapedHex(ComponentSequence *currentSeq, unichar accum, #define SLASH_C_ERROR "\\c must be followed by an ASCII character" static -u8 decodeCtrl(char raw) { +u8 decodeCtrl(char raw) { if (raw & 0x80) { throw LocatedParseError(SLASH_C_ERROR); } @@ -224,8 +224,8 @@ u8 decodeCtrl(char raw) { } static -unichar readUtf8CodePoint2c(const char *s) { - auto *ts = (const u8 *)s; +unichar readUtf8CodePoint2c(const char *s) { + auto *ts = (const u8 *)s; assert(ts[0] >= 0xc0 && ts[0] < 0xe0); assert(ts[1] >= 0x80 && ts[1] < 0xc0); unichar val = ts[0] & 0x1f; @@ -237,8 +237,8 @@ unichar readUtf8CodePoint2c(const char *s) { } static -unichar readUtf8CodePoint3c(const char *s) { - auto *ts = (const u8 *)s; +unichar readUtf8CodePoint3c(const char *s) { + auto *ts = (const u8 *)s; assert(ts[0] >= 0xe0 && ts[0] < 0xf0); assert(ts[1] >= 0x80 && ts[1] < 0xc0); assert(ts[2] >= 0x80 && ts[2] < 0xc0); @@ -253,8 +253,8 @@ unichar readUtf8CodePoint3c(const char *s) { } static -unichar readUtf8CodePoint4c(const char *s) { - auto *ts = (const u8 *)s; +unichar readUtf8CodePoint4c(const char *s) { + auto *ts = (const u8 *)s; assert(ts[0] >= 0xf0 && ts[0] < 0xf8); assert(ts[1] >= 0x80 && ts[1] < 0xc0); assert(ts[2] >= 0x80 && ts[2] < 0xc0); @@ -276,8 +276,8 @@ unichar readUtf8CodePoint4c(const char *s) { action throwUnsupportedEscape { ostringstream str; - str << "'\\" << *(ts + 1) << "' at index " << ts - ptr - << " not supported in a character class."; + str << "'\\" << *(ts + 1) << "' at index " << ts - ptr + << " not supported in a character class."; throw ParseError(str.str()); } action unsupportedProperty { @@ -549,25 +549,25 @@ unichar readUtf8CodePoint4c(const char *s) { ############################################################# readVerb := |* 'UTF8)' => { - throw LocatedParseError("(*UTF8) must be at start of " - "expression, encountered"); - }; - 'UTF)' => { - throw LocatedParseError("(*UTF) must be at start of " - "expression, encountered"); + throw LocatedParseError("(*UTF8) must be at start of " + "expression, encountered"); }; + 'UTF)' => { + throw LocatedParseError("(*UTF) must be at start of " + "expression, encountered"); + }; 'UCP)' => { - throw LocatedParseError("(*UCP) must be at start of " - "expression, encountered"); + throw LocatedParseError("(*UCP) must be at start of " + "expression, encountered"); }; - # Use the control verb mini-parser to report an error for this - # unsupported/unknown verb. - [^)]+ ')' => { - ParseMode temp_mode; - assert(ts - 2 >= ptr); // parser needs the '(*' at the start too. - read_control_verbs(ts - 2, te, (ts - 2 - ptr), temp_mode); - assert(0); // Should have thrown a parse error. - throw LocatedParseError("Unknown control verb"); + # Use the control verb mini-parser to report an error for this + # unsupported/unknown verb. + [^)]+ ')' => { + ParseMode temp_mode; + assert(ts - 2 >= ptr); // parser needs the '(*' at the start too. + read_control_verbs(ts - 2, te, (ts - 2 - ptr), temp_mode); + assert(0); // Should have thrown a parse error. + throw LocatedParseError("Unknown control verb"); }; any => { throw LocatedParseError("Unknown control verb"); @@ -976,13 +976,13 @@ unichar readUtf8CodePoint4c(const char *s) { }; '\\o{' [0-7]+ '}' => { - string oct(ts + 3, te - ts - 4); - unsigned long val; - try { - val = stoul(oct, nullptr, 8); - } catch (const std::out_of_range &) { - val = MAX_UNICODE + 1; - } + string oct(ts + 3, te - ts - 4); + unsigned long val; + try { + val = stoul(oct, nullptr, 8); + } catch (const std::out_of_range &) { + val = MAX_UNICODE + 1; + } if ((!mode.utf8 && val > 255) || val > MAX_UNICODE) { throw LocatedParseError("Value in \\o{...} sequence is too large"); } @@ -1006,13 +1006,13 @@ unichar readUtf8CodePoint4c(const char *s) { }; # Unicode Hex '\\x{' xdigit+ '}' => { - string hex(ts + 3, te - ts - 4); - unsigned long val; - try { - val = stoul(hex, nullptr, 16); - } catch (const std::out_of_range &) { - val = MAX_UNICODE + 1; - } + string hex(ts + 3, te - ts - 4); + unsigned long val; + try { + val = stoul(hex, nullptr, 16); + } catch (const std::out_of_range &) { + val = MAX_UNICODE + 1; + } if (val > MAX_UNICODE) { throw LocatedParseError("Value in \\x{...} sequence is too large"); } @@ -1101,7 +1101,7 @@ unichar readUtf8CodePoint4c(const char *s) { # Literal character (any - ']') => { - currentCls->add((u8)*ts); + currentCls->add((u8)*ts); }; ']' => { @@ -1155,40 +1155,40 @@ unichar readUtf8CodePoint4c(const char *s) { '\\E' => { fgoto main; }; - - #unicode chars - utf8_2c when is_utf8 => { - assert(mode.utf8); - /* leverage ComponentClass to generate the vertices */ - auto cc = getComponentClass(mode); - cc->add(readUtf8CodePoint2c(ts)); - cc->finalize(); - currentSeq->addComponent(move(cc)); - }; - - utf8_3c when is_utf8 => { - assert(mode.utf8); - /* leverage ComponentClass to generate the vertices */ - auto cc = getComponentClass(mode); - cc->add(readUtf8CodePoint3c(ts)); - cc->finalize(); - currentSeq->addComponent(move(cc)); - }; - - utf8_4c when is_utf8 => { - assert(mode.utf8); - /* leverage ComponentClass to generate the vertices */ - auto cc = getComponentClass(mode); - cc->add(readUtf8CodePoint4c(ts)); - cc->finalize(); - currentSeq->addComponent(move(cc)); - }; - - hi_byte when is_utf8 => { - assert(mode.utf8); - throwInvalidUtf8(); - }; - + + #unicode chars + utf8_2c when is_utf8 => { + assert(mode.utf8); + /* leverage ComponentClass to generate the vertices */ + auto cc = getComponentClass(mode); + cc->add(readUtf8CodePoint2c(ts)); + cc->finalize(); + currentSeq->addComponent(move(cc)); + }; + + utf8_3c when is_utf8 => { + assert(mode.utf8); + /* leverage ComponentClass to generate the vertices */ + auto cc = getComponentClass(mode); + cc->add(readUtf8CodePoint3c(ts)); + cc->finalize(); + currentSeq->addComponent(move(cc)); + }; + + utf8_4c when is_utf8 => { + assert(mode.utf8); + /* leverage ComponentClass to generate the vertices */ + auto cc = getComponentClass(mode); + cc->add(readUtf8CodePoint4c(ts)); + cc->finalize(); + currentSeq->addComponent(move(cc)); + }; + + hi_byte when is_utf8 => { + assert(mode.utf8); + throwInvalidUtf8(); + }; + # Literal character any => { addLiteral(currentSeq, *ts, mode); @@ -1203,31 +1203,31 @@ unichar readUtf8CodePoint4c(const char *s) { '\\E' => { fret; }; - - #unicode chars - utf8_2c when is_utf8 => { - assert(mode.utf8); - currentCls->add(readUtf8CodePoint2c(ts)); - inCharClassEarly = false; - }; - - utf8_3c when is_utf8 => { - assert(mode.utf8); - currentCls->add(readUtf8CodePoint3c(ts)); - inCharClassEarly = false; - }; - - utf8_4c when is_utf8 => { - assert(mode.utf8); - currentCls->add(readUtf8CodePoint4c(ts)); - inCharClassEarly = false; - }; - - hi_byte when is_utf8 => { - assert(mode.utf8); - throwInvalidUtf8(); - }; - + + #unicode chars + utf8_2c when is_utf8 => { + assert(mode.utf8); + currentCls->add(readUtf8CodePoint2c(ts)); + inCharClassEarly = false; + }; + + utf8_3c when is_utf8 => { + assert(mode.utf8); + currentCls->add(readUtf8CodePoint3c(ts)); + inCharClassEarly = false; + }; + + utf8_4c when is_utf8 => { + assert(mode.utf8); + currentCls->add(readUtf8CodePoint4c(ts)); + inCharClassEarly = false; + }; + + hi_byte when is_utf8 => { + assert(mode.utf8); + throwInvalidUtf8(); + }; + # Literal character any => { currentCls->add(*ts); @@ -1294,8 +1294,8 @@ unichar readUtf8CodePoint4c(const char *s) { '\\Q' => { fgoto readQuotedLiteral; }; - # An \E that is not preceded by a \Q is ignored - '\\E' => { /* noop */ }; + # An \E that is not preceded by a \Q is ignored + '\\E' => { /* noop */ }; # Match any character '\.' => { currentSeq->addComponent(generateComponent(CLASS_ANY, false, mode)); @@ -1514,12 +1514,12 @@ unichar readUtf8CodePoint4c(const char *s) { // Otherwise, we interpret the first three digits as an // octal escape, and the remaining characters stand for // themselves as literals. - const char *s = ts; + const char *s = ts; unsigned int accum = 0; unsigned int oct_digits = 0; - assert(*s == '\\'); // token starts at backslash - for (++s; s < te && oct_digits < 3; ++oct_digits, ++s) { - u8 digit = *s - '0'; + assert(*s == '\\'); // token starts at backslash + for (++s; s < te && oct_digits < 3; ++oct_digits, ++s) { + u8 digit = *s - '0'; if (digit < 8) { accum = digit + accum * 8; } else { @@ -1532,8 +1532,8 @@ unichar readUtf8CodePoint4c(const char *s) { } // And then the rest of the digits, if any, are literal. - for (; s < te; ++s) { - addLiteral(currentSeq, *s, mode); + for (; s < te; ++s) { + addLiteral(currentSeq, *s, mode); } } }; @@ -1559,13 +1559,13 @@ unichar readUtf8CodePoint4c(const char *s) { throw LocatedParseError("Invalid reference after \\g"); }; '\\o{' [0-7]+ '}' => { - string oct(ts + 3, te - ts - 4); - unsigned long val; - try { - val = stoul(oct, nullptr, 8); - } catch (const std::out_of_range &) { - val = MAX_UNICODE + 1; - } + string oct(ts + 3, te - ts - 4); + unsigned long val; + try { + val = stoul(oct, nullptr, 8); + } catch (const std::out_of_range &) { + val = MAX_UNICODE + 1; + } if ((!mode.utf8 && val > 255) || val > MAX_UNICODE) { throw LocatedParseError("Value in \\o{...} sequence is too large"); } @@ -1581,13 +1581,13 @@ unichar readUtf8CodePoint4c(const char *s) { }; # Unicode Hex '\\x{' xdigit+ '}' => { - string hex(ts + 3, te - ts - 4); - unsigned long val; - try { - val = stoul(hex, nullptr, 16); - } catch (const std::out_of_range &) { - val = MAX_UNICODE + 1; - } + string hex(ts + 3, te - ts - 4); + unsigned long val; + try { + val = stoul(hex, nullptr, 16); + } catch (const std::out_of_range &) { + val = MAX_UNICODE + 1; + } if (val > MAX_UNICODE) { throw LocatedParseError("Value in \\x{...} sequence is too large"); } @@ -1610,8 +1610,8 @@ unichar readUtf8CodePoint4c(const char *s) { # A bunch of unsupported (for now) escapes escapedUnsupported => { ostringstream str; - str << "'\\" << *(ts + 1) << "' at index " << ts - ptr - << " not supported."; + str << "'\\" << *(ts + 1) << "' at index " << ts - ptr + << " not supported."; throw ParseError(str.str()); }; @@ -1912,22 +1912,22 @@ unichar readUtf8CodePoint4c(const char *s) { %% write data nofinal; /** \brief Main parser call, returns root Component or nullptr. */ -unique_ptr<Component> parse(const char *ptr, ParseMode &globalMode) { - assert(ptr); - - const char *p = ptr; - const char *pe = ptr + strlen(ptr); - - // First, read the control verbs, set any global mode flags and move the - // ptr forward. - p = read_control_verbs(p, pe, 0, globalMode); - - const char *eof = pe; +unique_ptr<Component> parse(const char *ptr, ParseMode &globalMode) { + assert(ptr); + + const char *p = ptr; + const char *pe = ptr + strlen(ptr); + + // First, read the control verbs, set any global mode flags and move the + // ptr forward. + p = read_control_verbs(p, pe, 0, globalMode); + + const char *eof = pe; int cs; UNUSED int act; int top; vector<int> stack; - const char *ts, *te; + const char *ts, *te; unichar accumulator = 0; unichar octAccumulator = 0; /* required as we are also accumulating for * back ref when looking for octals */ @@ -1950,7 +1950,7 @@ unique_ptr<Component> parse(const char *ptr, ParseMode &globalMode) { unsigned groupIndex = 1; // Set storing group names that are currently in use. - flat_set<string> groupNames; + flat_set<string> groupNames; // Root sequence. unique_ptr<ComponentSequence> rootSeq = ue2::make_unique<ComponentSequence>(); @@ -1973,7 +1973,7 @@ unique_ptr<Component> parse(const char *ptr, ParseMode &globalMode) { bool inCharClassEarly = false; // Location at which the current character class began. - const char *currentClsBegin = p; + const char *currentClsBegin = p; // We throw exceptions on various parsing failures beyond this point: we // use a try/catch block here to clean up our allocated memory before we diff --git a/contrib/libs/hyperscan/src/parser/buildstate.cpp b/contrib/libs/hyperscan/src/parser/buildstate.cpp index 75cfbb7b2d..a5b0d2f2eb 100644 --- a/contrib/libs/hyperscan/src/parser/buildstate.cpp +++ b/contrib/libs/hyperscan/src/parser/buildstate.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2017, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -39,10 +39,10 @@ #include "nfagraph/ng_builder.h" #include "util/charreach.h" #include "util/container.h" -#include "util/flat_containers.h" -#include "util/hash.h" +#include "util/flat_containers.h" +#include "util/hash.h" #include "util/make_unique.h" -#include "util/unordered.h" +#include "util/unordered.h" #include <algorithm> #include <iterator> @@ -451,7 +451,7 @@ unique_ptr<GlushkovBuildState> makeGlushkovBuildState(NFABuilder &b, * Scans through a list of positions and retains only the highest priority * version of a given (position, flags) entry. */ void cleanupPositions(vector<PositionInfo> &a) { - ue2_unordered_set<pair<Position, int>> seen; + ue2_unordered_set<pair<Position, int>> seen; vector<PositionInfo> out; out.reserve(a.size()); // output should be close to input in size. diff --git a/contrib/libs/hyperscan/src/parser/buildstate.h b/contrib/libs/hyperscan/src/parser/buildstate.h index 5ddaf9b238..71109262d1 100644 --- a/contrib/libs/hyperscan/src/parser/buildstate.h +++ b/contrib/libs/hyperscan/src/parser/buildstate.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2017, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -35,7 +35,7 @@ #include "ue2common.h" #include "position.h" -#include "util/noncopyable.h" +#include "util/noncopyable.h" #include <memory> #include <vector> @@ -49,7 +49,7 @@ class PositionInfo; * * Abstract base class; use \ref makeGlushkovBuildState to get one of these you * can use. */ -class GlushkovBuildState : noncopyable { +class GlushkovBuildState : noncopyable { public: /** \brief Represents an uninitialized state. */ static const Position POS_UNINITIALIZED; diff --git a/contrib/libs/hyperscan/src/parser/check_refs.cpp b/contrib/libs/hyperscan/src/parser/check_refs.cpp index 60b5b6ba77..fb32ec2974 100644 --- a/contrib/libs/hyperscan/src/parser/check_refs.cpp +++ b/contrib/libs/hyperscan/src/parser/check_refs.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2017, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -36,7 +36,7 @@ #include "ConstComponentVisitor.h" #include "parse_error.h" #include "util/container.h" -#include "util/flat_containers.h" +#include "util/flat_containers.h" #include <sstream> @@ -73,8 +73,8 @@ public: throw ParseError(str.str()); } - using DefaultConstComponentVisitor::pre; - + using DefaultConstComponentVisitor::pre; + void pre(const ComponentBackReference &c) override { if (c.ref_id) { if (c.ref_id >= num_ids) { @@ -114,7 +114,7 @@ public: ReferenceVisitor::~ReferenceVisitor() {} void checkReferences(const Component &root, unsigned int groupIndices, - const flat_set<std::string> &groupNames) { + const flat_set<std::string> &groupNames) { ReferenceVisitor vis(groupIndices, groupNames); root.accept(vis); } diff --git a/contrib/libs/hyperscan/src/parser/check_refs.h b/contrib/libs/hyperscan/src/parser/check_refs.h index 26912fb8e4..5e1678702a 100644 --- a/contrib/libs/hyperscan/src/parser/check_refs.h +++ b/contrib/libs/hyperscan/src/parser/check_refs.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2017, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -26,17 +26,17 @@ * POSSIBILITY OF SUCH DAMAGE. */ -/** - * \file +/** + * \file * \brief Component tree analysis that checks that references (such as * back-refs, conditionals) have valid referents. */ -#ifndef PARSER_CHECK_REFS_H -#define PARSER_CHECK_REFS_H - -#include "util/flat_containers.h" +#ifndef PARSER_CHECK_REFS_H +#define PARSER_CHECK_REFS_H +#include "util/flat_containers.h" + #include <string> namespace ue2 { @@ -45,8 +45,8 @@ class Component; class ComponentSequence; void checkReferences(const Component &root, unsigned int groupIndices, - const flat_set<std::string> &groupNames); + const flat_set<std::string> &groupNames); } // namespace ue2 -#endif // PARSER_CHECK_REFS_H +#endif // PARSER_CHECK_REFS_H diff --git a/contrib/libs/hyperscan/src/parser/control_verbs.h b/contrib/libs/hyperscan/src/parser/control_verbs.h index 58934ec2ce..6093502386 100644 --- a/contrib/libs/hyperscan/src/parser/control_verbs.h +++ b/contrib/libs/hyperscan/src/parser/control_verbs.h @@ -1,48 +1,48 @@ -/* - * Copyright (c) 2017, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -/** - * \file - * \brief Parser for control verbs that can occur at the beginning of a pattern. - */ - -#ifndef CONTROL_VERBS_H -#define CONTROL_VERBS_H - -#include "ue2common.h" - -namespace ue2 { - -struct ParseMode; - -const char *read_control_verbs(const char *ptr, const char *end, size_t start, - ParseMode &mode); - -} // namespace ue2 - -#endif // CONTROL_VERBS_H +/* + * Copyright (c) 2017, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/** + * \file + * \brief Parser for control verbs that can occur at the beginning of a pattern. + */ + +#ifndef CONTROL_VERBS_H +#define CONTROL_VERBS_H + +#include "ue2common.h" + +namespace ue2 { + +struct ParseMode; + +const char *read_control_verbs(const char *ptr, const char *end, size_t start, + ParseMode &mode); + +} // namespace ue2 + +#endif // CONTROL_VERBS_H diff --git a/contrib/libs/hyperscan/src/parser/control_verbs.rl6 b/contrib/libs/hyperscan/src/parser/control_verbs.rl6 index 1d3e33a9aa..46a0bfd000 100644 --- a/contrib/libs/hyperscan/src/parser/control_verbs.rl6 +++ b/contrib/libs/hyperscan/src/parser/control_verbs.rl6 @@ -1,121 +1,121 @@ -/* - * Copyright (c) 2017, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -/** - * \file - * \brief Parser for control verbs that can occur at the beginning of a pattern. - */ - -#include "parser/control_verbs.h" - -#include "parser/Parser.h" -#include "parser/parse_error.h" - -#include <cstring> -#include <sstream> - -using namespace std; - -namespace ue2 { - -const char *read_control_verbs(const char *ptr, const char *end, size_t start, - ParseMode &mode) { - const char *p = ptr; - const char *pe = end; - const char *eof = pe; - const char *ts, *te; - int cs; - UNUSED int act; - - %%{ - machine ControlVerbs; - - # Verbs that we recognise but do not support. - unhandledVerbs = '(*' ( - 'LIMIT_MATCH=' [0-9]+ | - 'LIMIT_RECURSION=' [0-9]+ | - 'NO_AUTO_POSSESS' | - 'NO_START_OPT' | - 'UTF16' | - 'UTF32' | - 'CR' | - 'LF' | - 'CRLF' | - 'ANYCRLF' | - 'ANY' | - 'BSR_ANYCRLF' | - 'BSR_UNICODE' - ) . ')'; - - main := |* - '(*UTF8)' | '(*UTF)' => { - mode.utf8 = true; - }; - - '(*UCP)' => { - mode.ucp = true; - }; - - unhandledVerbs => { - ostringstream str; - str << "Unsupported control verb " << string(ts, te - ts); - throw LocatedParseError(str.str()); - }; - - '(*' [^)]+ ')' => { - ostringstream str; - str << "Unknown control verb " << string(ts, te - ts); - throw LocatedParseError(str.str()); - }; - - # Anything else means we're done. - any => { - fhold; - fbreak; - }; - *|; - - write data; - write init; - }%% - - try { - %% write exec; - } catch (LocatedParseError &error) { - if (ts >= ptr && ts <= pe) { - error.locate(ts - ptr + start); - } else { - error.locate(0); - } - throw; - } - - return p; -} - -} // namespace ue2 +/* + * Copyright (c) 2017, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/** + * \file + * \brief Parser for control verbs that can occur at the beginning of a pattern. + */ + +#include "parser/control_verbs.h" + +#include "parser/Parser.h" +#include "parser/parse_error.h" + +#include <cstring> +#include <sstream> + +using namespace std; + +namespace ue2 { + +const char *read_control_verbs(const char *ptr, const char *end, size_t start, + ParseMode &mode) { + const char *p = ptr; + const char *pe = end; + const char *eof = pe; + const char *ts, *te; + int cs; + UNUSED int act; + + %%{ + machine ControlVerbs; + + # Verbs that we recognise but do not support. + unhandledVerbs = '(*' ( + 'LIMIT_MATCH=' [0-9]+ | + 'LIMIT_RECURSION=' [0-9]+ | + 'NO_AUTO_POSSESS' | + 'NO_START_OPT' | + 'UTF16' | + 'UTF32' | + 'CR' | + 'LF' | + 'CRLF' | + 'ANYCRLF' | + 'ANY' | + 'BSR_ANYCRLF' | + 'BSR_UNICODE' + ) . ')'; + + main := |* + '(*UTF8)' | '(*UTF)' => { + mode.utf8 = true; + }; + + '(*UCP)' => { + mode.ucp = true; + }; + + unhandledVerbs => { + ostringstream str; + str << "Unsupported control verb " << string(ts, te - ts); + throw LocatedParseError(str.str()); + }; + + '(*' [^)]+ ')' => { + ostringstream str; + str << "Unknown control verb " << string(ts, te - ts); + throw LocatedParseError(str.str()); + }; + + # Anything else means we're done. + any => { + fhold; + fbreak; + }; + *|; + + write data; + write init; + }%% + + try { + %% write exec; + } catch (LocatedParseError &error) { + if (ts >= ptr && ts <= pe) { + error.locate(ts - ptr + start); + } else { + error.locate(0); + } + throw; + } + + return p; +} + +} // namespace ue2 diff --git a/contrib/libs/hyperscan/src/parser/parse_error.cpp b/contrib/libs/hyperscan/src/parser/parse_error.cpp index e7f60b2645..7852c40015 100644 --- a/contrib/libs/hyperscan/src/parser/parse_error.cpp +++ b/contrib/libs/hyperscan/src/parser/parse_error.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2017, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -44,13 +44,13 @@ ParseError::~ParseError() {} LocatedParseError::~LocatedParseError() {} void LocatedParseError::locate(size_t offset) { - if (finalized) { - return; - } + if (finalized) { + return; + } std::ostringstream str; str << reason << " at index " << offset << "."; reason = str.str(); - finalized = true; + finalized = true; } } diff --git a/contrib/libs/hyperscan/src/parser/parse_error.h b/contrib/libs/hyperscan/src/parser/parse_error.h index 4556ed5e04..af6ba78c0a 100644 --- a/contrib/libs/hyperscan/src/parser/parse_error.h +++ b/contrib/libs/hyperscan/src/parser/parse_error.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2017, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -30,8 +30,8 @@ * \brief Parse/Compile exceptions. */ -#ifndef PARSE_ERROR_H -#define PARSE_ERROR_H +#ifndef PARSE_ERROR_H +#define PARSE_ERROR_H #include "util/compile_error.h" @@ -44,24 +44,24 @@ class ParseError : public CompileError { public: // Note: 'why' should describe why the error occurred and end with a // full stop, but no line break. - explicit ParseError(std::string why) : CompileError(std::move(why)) {} + explicit ParseError(std::string why) : CompileError(std::move(why)) {} ~ParseError() override; }; class LocatedParseError : public ParseError { public: - explicit LocatedParseError(std::string why) : ParseError(".") { - reason = std::move(why); // don't use ParseError ctor + explicit LocatedParseError(std::string why) : ParseError(".") { + reason = std::move(why); // don't use ParseError ctor } ~LocatedParseError() override; void locate(size_t offset); -private: - bool finalized = false; //!< true when locate() has been called. +private: + bool finalized = false; //!< true when locate() has been called. }; } // namespace ue2 -#endif /* PARSE_ERROR_H */ +#endif /* PARSE_ERROR_H */ diff --git a/contrib/libs/hyperscan/src/parser/prefilter.cpp b/contrib/libs/hyperscan/src/parser/prefilter.cpp index f69362e4e3..96292aed73 100644 --- a/contrib/libs/hyperscan/src/parser/prefilter.cpp +++ b/contrib/libs/hyperscan/src/parser/prefilter.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -80,9 +80,9 @@ public: return numPositions <= MAX_REFERENT_POSITIONS; } - using DefaultConstComponentVisitor::pre; - using DefaultConstComponentVisitor::post; - + using DefaultConstComponentVisitor::pre; + using DefaultConstComponentVisitor::post; + void pre(const AsciiComponentClass &) override { numPositions++; } @@ -167,8 +167,8 @@ public: explicit FindSequenceVisitor(unsigned ref_id) : id(ref_id) {} explicit FindSequenceVisitor(const std::string &s) : name(s) {} - using DefaultConstComponentVisitor::pre; - + using DefaultConstComponentVisitor::pre; + void pre(const ComponentSequence &c) override { if (!name.empty()) { if (c.getCaptureName() == name) { @@ -208,8 +208,8 @@ public: PrefilterVisitor(Component *c, const ParseMode &m) : root(c), mode(m) {} ~PrefilterVisitor() override; - using DefaultComponentVisitor::visit; - + using DefaultComponentVisitor::visit; + /** \brief Calls the visitor (recursively) on a new replacement component * we've just created. Takes care of freeing it if the sequence is itself * replaced. */ @@ -295,16 +295,16 @@ public: Component *visit(ComponentWordBoundary *c) override { assert(c); - - // TODO: Right now, we do not have correct code for resolving these - // when prefiltering is on, UCP is on, and UTF-8 is *off*. For now, we - // just replace with an empty sequence (as that will return a superset - // of matches). - if (mode.ucp && !mode.utf8) { - return new ComponentSequence(); - } - - // All other cases can be prefiltered. + + // TODO: Right now, we do not have correct code for resolving these + // when prefiltering is on, UCP is on, and UTF-8 is *off*. For now, we + // just replace with an empty sequence (as that will return a superset + // of matches). + if (mode.ucp && !mode.utf8) { + return new ComponentSequence(); + } + + // All other cases can be prefiltered. c->setPrefilter(true); return c; } diff --git a/contrib/libs/hyperscan/src/parser/shortcut_literal.cpp b/contrib/libs/hyperscan/src/parser/shortcut_literal.cpp index a5d67f30d8..3f7150c2f1 100644 --- a/contrib/libs/hyperscan/src/parser/shortcut_literal.cpp +++ b/contrib/libs/hyperscan/src/parser/shortcut_literal.cpp @@ -159,26 +159,26 @@ public: ConstructLiteralVisitor::~ConstructLiteralVisitor() {} /** \brief True if the literal expression \a expr could be added to Rose. */ -bool shortcutLiteral(NG &ng, const ParsedExpression &pe) { - assert(pe.component); +bool shortcutLiteral(NG &ng, const ParsedExpression &pe) { + assert(pe.component); - if (!ng.cc.grey.allowLiteral) { + if (!ng.cc.grey.allowLiteral) { return false; } - const auto &expr = pe.expr; - + const auto &expr = pe.expr; + // XXX: don't shortcut literals with extended params (yet) - if (expr.min_offset || expr.max_offset != MAX_OFFSET || expr.min_length || - expr.edit_distance || expr.hamm_distance) { + if (expr.min_offset || expr.max_offset != MAX_OFFSET || expr.min_length || + expr.edit_distance || expr.hamm_distance) { DEBUG_PRINTF("extended params not allowed\n"); return false; } ConstructLiteralVisitor vis; try { - assert(pe.component); - pe.component->accept(vis); + assert(pe.component); + pe.component->accept(vis); assert(vis.repeat_stack.empty()); } catch (const ConstructLiteralVisitor::NotLiteral&) { DEBUG_PRINTF("not a literal\n"); @@ -198,7 +198,7 @@ bool shortcutLiteral(NG &ng, const ParsedExpression &pe) { } DEBUG_PRINTF("constructed literal %s\n", dumpString(lit).c_str()); - return ng.addLiteral(lit, expr.index, expr.report, expr.highlander, + return ng.addLiteral(lit, expr.index, expr.report, expr.highlander, expr.som, expr.quiet); } diff --git a/contrib/libs/hyperscan/src/parser/ucp_table.cpp b/contrib/libs/hyperscan/src/parser/ucp_table.cpp index fc1330fe7f..dc7474a374 100644 --- a/contrib/libs/hyperscan/src/parser/ucp_table.cpp +++ b/contrib/libs/hyperscan/src/parser/ucp_table.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -83,13 +83,13 @@ void make_caseless(CodePointSet *cps) { CodePointSet base = *cps; - auto uc_begin = begin(ucp_caseless_def); - auto uc_end = end(ucp_caseless_def); - DEBUG_PRINTF("uc len %zd\n", distance(uc_begin, uc_end)); + auto uc_begin = begin(ucp_caseless_def); + auto uc_end = end(ucp_caseless_def); + DEBUG_PRINTF("uc len %zd\n", distance(uc_begin, uc_end)); - for (const auto &elem : base) { - unichar b = lower(elem); - unichar e = upper(elem) + 1; + for (const auto &elem : base) { + unichar b = lower(elem); + unichar e = upper(elem) + 1; for (; b < e; b++) { DEBUG_PRINTF("decasing %x\n", b); @@ -100,7 +100,7 @@ void make_caseless(CodePointSet *cps) { DEBUG_PRINTF("EOL\n"); return; } - while (uc_begin != uc_end && uc_begin->base == b) { + while (uc_begin != uc_end && uc_begin->base == b) { DEBUG_PRINTF("at {%x,%x}\n", uc_begin->base, uc_begin->caseless); cps->set(uc_begin->caseless); ++uc_begin; @@ -116,12 +116,12 @@ void make_caseless(CodePointSet *cps) { bool flip_case(unichar *c) { assert(c); - const unicase test = { *c, 0 }; + const unicase test = { *c, 0 }; - const auto uc_begin = begin(ucp_caseless_def); - const auto uc_end = end(ucp_caseless_def); - const auto f = lower_bound(uc_begin, uc_end, test); - if (f != uc_end && f->base == *c) { + const auto uc_begin = begin(ucp_caseless_def); + const auto uc_end = end(ucp_caseless_def); + const auto f = lower_bound(uc_begin, uc_end, test); + if (f != uc_end && f->base == *c) { DEBUG_PRINTF("flipped c=%x to %x\n", *c, f->caseless); *c = f->caseless; return true; diff --git a/contrib/libs/hyperscan/src/parser/unsupported.cpp b/contrib/libs/hyperscan/src/parser/unsupported.cpp index c4b18b6a30..7e515c9028 100644 --- a/contrib/libs/hyperscan/src/parser/unsupported.cpp +++ b/contrib/libs/hyperscan/src/parser/unsupported.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -45,7 +45,7 @@ namespace ue2 { class UnsupportedVisitor : public DefaultConstComponentVisitor { public: ~UnsupportedVisitor() override; - using DefaultConstComponentVisitor::pre; + using DefaultConstComponentVisitor::pre; void pre(const ComponentAssertion &) override { throw ParseError("Zero-width assertions are not supported."); } |