diff options
author | Ivan Blinkov <ivan@blinkov.ru> | 2022-02-10 16:47:10 +0300 |
---|---|---|
committer | Daniil Cherednik <dcherednik@yandex-team.ru> | 2022-02-10 16:47:10 +0300 |
commit | 1aeb9a455974457866f78722ad98114bafc84e8a (patch) | |
tree | e4340eaf1668684d83a0a58c36947c5def5350ad /contrib/libs/hyperscan/src/compiler/compiler.cpp | |
parent | bd5ef432f5cfb1e18851381329d94665a4c22470 (diff) | |
download | ydb-1aeb9a455974457866f78722ad98114bafc84e8a.tar.gz |
Restoring authorship annotation for Ivan Blinkov <ivan@blinkov.ru>. Commit 1 of 2.
Diffstat (limited to 'contrib/libs/hyperscan/src/compiler/compiler.cpp')
-rw-r--r-- | contrib/libs/hyperscan/src/compiler/compiler.cpp | 240 |
1 files changed, 120 insertions, 120 deletions
diff --git a/contrib/libs/hyperscan/src/compiler/compiler.cpp b/contrib/libs/hyperscan/src/compiler/compiler.cpp index 5751bd64f4..86974f9281 100644 --- a/contrib/libs/hyperscan/src/compiler/compiler.cpp +++ b/contrib/libs/hyperscan/src/compiler/compiler.cpp @@ -29,10 +29,10 @@ /** \file * \brief Compiler front-end interface. */ -#include "allocator.h" +#include "allocator.h" #include "asserts.h" #include "compiler.h" -#include "crc32.h" +#include "crc32.h" #include "database.h" #include "grey.h" #include "hs_internal.h" @@ -58,7 +58,7 @@ #include "rose/rose_build.h" #include "rose/rose_internal.h" #include "som/slot_manager_dump.h" -#include "util/bytecode_ptr.h" +#include "util/bytecode_ptr.h" #include "util/compile_error.h" #include "util/target_info.h" #include "util/verify_types.h" @@ -80,9 +80,9 @@ static void validateExt(const hs_expr_ext &ext) { static const unsigned long long ALL_EXT_FLAGS = HS_EXT_FLAG_MIN_OFFSET | HS_EXT_FLAG_MAX_OFFSET | - HS_EXT_FLAG_MIN_LENGTH | - HS_EXT_FLAG_EDIT_DISTANCE | - HS_EXT_FLAG_HAMMING_DISTANCE; + HS_EXT_FLAG_MIN_LENGTH | + HS_EXT_FLAG_EDIT_DISTANCE | + HS_EXT_FLAG_HAMMING_DISTANCE; if (ext.flags & ~ALL_EXT_FLAGS) { throw CompileError("Invalid hs_expr_ext flag set."); } @@ -100,13 +100,13 @@ void validateExt(const hs_expr_ext &ext) { throw CompileError("In hs_expr_ext, min_length must be less than or " "equal to max_offset."); } - - if ((ext.flags & HS_EXT_FLAG_EDIT_DISTANCE) && - (ext.flags & HS_EXT_FLAG_HAMMING_DISTANCE)) { - throw CompileError("In hs_expr_ext, cannot have both edit distance and " - "Hamming distance."); - } - + + if ((ext.flags & HS_EXT_FLAG_EDIT_DISTANCE) && + (ext.flags & HS_EXT_FLAG_HAMMING_DISTANCE)) { + throw CompileError("In hs_expr_ext, cannot have both edit distance and " + "Hamming distance."); + } + } void ParsedLitExpression::parseLiteral(const char *expression, size_t len, @@ -150,10 +150,10 @@ ParsedLitExpression::ParsedLitExpression(unsigned index_in, } ParsedExpression::ParsedExpression(unsigned index_in, const char *expression, - unsigned flags, ReportID report, + unsigned flags, ReportID report, const hs_expr_ext *ext) - : expr(index_in, flags & HS_FLAG_ALLOWEMPTY, flags & HS_FLAG_SINGLEMATCH, - false, flags & HS_FLAG_PREFILTER, SOM_NONE, report, 0, MAX_OFFSET, + : expr(index_in, flags & HS_FLAG_ALLOWEMPTY, flags & HS_FLAG_SINGLEMATCH, + false, flags & HS_FLAG_PREFILTER, SOM_NONE, report, 0, MAX_OFFSET, 0, 0, 0, flags & HS_FLAG_QUIET) { // We disallow SOM + Quiet. if ((flags & HS_FLAG_QUIET) && (flags & HS_FLAG_SOM_LEFTMOST)) { @@ -165,7 +165,7 @@ ParsedExpression::ParsedExpression(unsigned index_in, const char *expression, component = parse(expression, mode); - expr.utf8 = mode.utf8; /* utf8 may be set by parse() */ + expr.utf8 = mode.utf8; /* utf8 may be set by parse() */ const size_t len = strlen(expression); if (expr.utf8 && !isValidUtf8(expression, len)) { @@ -196,7 +196,7 @@ ParsedExpression::ParsedExpression(unsigned index_in, const char *expression, // Set SOM type. if (flags & HS_FLAG_SOM_LEFTMOST) { - expr.som = SOM_LEFT; + expr.som = SOM_LEFT; } // Set extended parameters, if we have them. @@ -205,32 +205,32 @@ ParsedExpression::ParsedExpression(unsigned index_in, const char *expression, validateExt(*ext); if (ext->flags & HS_EXT_FLAG_MIN_OFFSET) { - expr.min_offset = ext->min_offset; + expr.min_offset = ext->min_offset; } if (ext->flags & HS_EXT_FLAG_MAX_OFFSET) { - expr.max_offset = ext->max_offset; + expr.max_offset = ext->max_offset; } if (ext->flags & HS_EXT_FLAG_MIN_LENGTH) { - expr.min_length = ext->min_length; - } - if (ext->flags & HS_EXT_FLAG_EDIT_DISTANCE) { - expr.edit_distance = ext->edit_distance; - } - if (ext->flags & HS_EXT_FLAG_HAMMING_DISTANCE) { - expr.hamm_distance = ext->hamming_distance; + expr.min_length = ext->min_length; } + if (ext->flags & HS_EXT_FLAG_EDIT_DISTANCE) { + expr.edit_distance = ext->edit_distance; + } + if (ext->flags & HS_EXT_FLAG_HAMMING_DISTANCE) { + expr.hamm_distance = ext->hamming_distance; + } } // These are validated in validateExt, so an error will already have been // thrown if these conditions don't hold. - assert(expr.max_offset >= expr.min_offset); - assert(expr.max_offset >= expr.min_length); + assert(expr.max_offset >= expr.min_offset); + assert(expr.max_offset >= expr.min_length); // Since prefiltering and SOM aren't supported together, we must squash any // min_length constraint as well. - if (flags & HS_FLAG_PREFILTER && expr.min_length) { + if (flags & HS_FLAG_PREFILTER && expr.min_length) { DEBUG_PRINTF("prefiltering mode: squashing min_length constraint\n"); - expr.min_length = 0; + expr.min_length = 0; } } @@ -239,25 +239,25 @@ ParsedExpression::ParsedExpression(unsigned index_in, const char *expression, * \brief Dumps the parse tree to screen in debug mode and to disk in dump * mode. */ -void dumpExpression(UNUSED const ParsedExpression &pe, +void dumpExpression(UNUSED const ParsedExpression &pe, UNUSED const char *stage, UNUSED const Grey &grey) { #if defined(DEBUG) - DEBUG_PRINTF("===== Rule ID: %u (expression index: %u) =====\n", - pe.expr.report, pe.expr.index); + DEBUG_PRINTF("===== Rule ID: %u (expression index: %u) =====\n", + pe.expr.report, pe.expr.index); ostringstream debug_tree; - dumpTree(debug_tree, pe.component.get()); + dumpTree(debug_tree, pe.component.get()); printf("%s\n", debug_tree.str().c_str()); #endif // DEBUG #if defined(DUMP_SUPPORT) if (grey.dumpFlags & Grey::DUMP_PARSE) { stringstream ss; - ss << grey.dumpPath << "Expr_" << pe.expr.index << "_componenttree_" + ss << grey.dumpPath << "Expr_" << pe.expr.index << "_componenttree_" << stage << ".txt"; ofstream out(ss.str().c_str()); - out << "Component Tree for " << pe.expr.report << endl; - dumpTree(out, pe.component.get()); - if (pe.expr.utf8) { + out << "Component Tree for " << pe.expr.report << endl; + dumpTree(out, pe.component.get()); + if (pe.expr.utf8) { out << "UTF8 mode" << endl; } } @@ -267,13 +267,13 @@ void dumpExpression(UNUSED const ParsedExpression &pe, /** \brief Run Component tree optimisations on \a expr. */ static -void optimise(ParsedExpression &pe) { - if (pe.expr.min_length || pe.expr.som) { +void optimise(ParsedExpression &pe) { + if (pe.expr.min_length || pe.expr.som) { return; } DEBUG_PRINTF("optimising\n"); - pe.component->optimise(true /* root is connected to sds */); + pe.component->optimise(true /* root is connected to sds */); } void addExpression(NG &ng, unsigned index, const char *expression, @@ -329,34 +329,34 @@ void addExpression(NG &ng, unsigned index, const char *expression, // Do per-expression processing: errors here will result in an exception // being thrown up to our caller - ParsedExpression pe(index, expression, flags, id, ext); - dumpExpression(pe, "orig", cc.grey); + ParsedExpression pe(index, expression, flags, id, ext); + dumpExpression(pe, "orig", cc.grey); // Apply prefiltering transformations if desired. - if (pe.expr.prefilter) { - prefilterTree(pe.component, ParseMode(flags)); - dumpExpression(pe, "prefiltered", cc.grey); + if (pe.expr.prefilter) { + prefilterTree(pe.component, ParseMode(flags)); + dumpExpression(pe, "prefiltered", cc.grey); } // Expressions containing zero-width assertions and other extended pcre // types aren't supported yet. This call will throw a ParseError exception // if the component tree contains such a construct. - checkUnsupported(*pe.component); + checkUnsupported(*pe.component); - pe.component->checkEmbeddedStartAnchor(true); - pe.component->checkEmbeddedEndAnchor(true); + pe.component->checkEmbeddedStartAnchor(true); + pe.component->checkEmbeddedEndAnchor(true); if (cc.grey.optimiseComponentTree) { - optimise(pe); - dumpExpression(pe, "opt", cc.grey); + optimise(pe); + dumpExpression(pe, "opt", cc.grey); } DEBUG_PRINTF("component=%p, nfaId=%u, reportId=%u\n", - pe.component.get(), pe.expr.index, pe.expr.report); + pe.component.get(), pe.expr.index, pe.expr.report); // You can only use the SOM flags if you've also specified an SOM // precision mode. - if (pe.expr.som != SOM_NONE && cc.streaming && !ng.ssm.somPrecision()) { + if (pe.expr.som != SOM_NONE && cc.streaming && !ng.ssm.somPrecision()) { throw CompileError("To use a SOM expression flag in streaming mode, " "an SOM precision mode (e.g. " "HS_MODE_SOM_HORIZON_LARGE) must be specified."); @@ -364,25 +364,25 @@ void addExpression(NG &ng, unsigned index, const char *expression, // If this expression is a literal, we can feed it directly to Rose rather // than building the NFA graph. - if (shortcutLiteral(ng, pe)) { + if (shortcutLiteral(ng, pe)) { DEBUG_PRINTF("took literal short cut\n"); return; } - auto built_expr = buildGraph(ng.rm, cc, pe); - if (!built_expr.g) { + auto built_expr = buildGraph(ng.rm, cc, pe); + if (!built_expr.g) { DEBUG_PRINTF("NFA build failed on ID %u, but no exception was " - "thrown.\n", pe.expr.report); + "thrown.\n", pe.expr.report); throw CompileError("Internal error."); } - if (!pe.expr.allow_vacuous && matches_everywhere(*built_expr.g)) { + if (!pe.expr.allow_vacuous && matches_everywhere(*built_expr.g)) { throw CompileError("Pattern matches empty buffer; use " "HS_FLAG_ALLOWEMPTY to enable support."); } - if (!ng.addGraph(built_expr.expr, std::move(built_expr.g))) { - DEBUG_PRINTF("NFA addGraph failed on ID %u.\n", pe.expr.report); + if (!ng.addGraph(built_expr.expr, std::move(built_expr.g))) { + DEBUG_PRINTF("NFA addGraph failed on ID %u.\n", pe.expr.report); throw CompileError("Error compiling expression."); } } @@ -430,7 +430,7 @@ void addLitExpression(NG &ng, unsigned index, const char *expression, } static -bytecode_ptr<RoseEngine> generateRoseEngine(NG &ng) { +bytecode_ptr<RoseEngine> generateRoseEngine(NG &ng) { const u32 minWidth = ng.minWidth.is_finite() ? verify_u32(ng.minWidth) : ROSE_BOUND_INF; auto rose = ng.rose->buildRose(minWidth); @@ -455,54 +455,54 @@ platform_t target_to_platform(const target_t &target_info) { if (!target_info.has_avx2()) { p |= HS_PLATFORM_NOAVX2; } - if (!target_info.has_avx512()) { - p |= HS_PLATFORM_NOAVX512; - } + if (!target_info.has_avx512()) { + p |= HS_PLATFORM_NOAVX512; + } if (!target_info.has_avx512vbmi()) { p |= HS_PLATFORM_NOAVX512VBMI; } return p; } -/** \brief Encapsulate the given bytecode (RoseEngine) in a newly-allocated - * \ref hs_database, ensuring that it is padded correctly to give cacheline - * alignment. */ -static -hs_database_t *dbCreate(const char *in_bytecode, size_t len, u64a platform) { - size_t db_len = sizeof(struct hs_database) + len; - DEBUG_PRINTF("db size %zu\n", db_len); - DEBUG_PRINTF("db platform %llx\n", platform); - - struct hs_database *db = (struct hs_database *)hs_database_alloc(db_len); - if (hs_check_alloc(db) != HS_SUCCESS) { - hs_database_free(db); - return nullptr; - } - - // So that none of our database is uninitialized - memset(db, 0, db_len); - - // we need to align things manually - size_t shift = (uintptr_t)db->bytes & 0x3f; - DEBUG_PRINTF("shift is %zu\n", shift); - - db->bytecode = offsetof(struct hs_database, bytes) - shift; - char *bytecode = (char *)db + db->bytecode; - assert(ISALIGNED_CL(bytecode)); - - db->magic = HS_DB_MAGIC; - db->version = HS_DB_VERSION; - db->length = len; - db->platform = platform; - - // Copy bytecode - memcpy(bytecode, in_bytecode, len); - - db->crc32 = Crc32c_ComputeBuf(0, bytecode, db->length); - return db; -} - - +/** \brief Encapsulate the given bytecode (RoseEngine) in a newly-allocated + * \ref hs_database, ensuring that it is padded correctly to give cacheline + * alignment. */ +static +hs_database_t *dbCreate(const char *in_bytecode, size_t len, u64a platform) { + size_t db_len = sizeof(struct hs_database) + len; + DEBUG_PRINTF("db size %zu\n", db_len); + DEBUG_PRINTF("db platform %llx\n", platform); + + struct hs_database *db = (struct hs_database *)hs_database_alloc(db_len); + if (hs_check_alloc(db) != HS_SUCCESS) { + hs_database_free(db); + return nullptr; + } + + // So that none of our database is uninitialized + memset(db, 0, db_len); + + // we need to align things manually + size_t shift = (uintptr_t)db->bytes & 0x3f; + DEBUG_PRINTF("shift is %zu\n", shift); + + db->bytecode = offsetof(struct hs_database, bytes) - shift; + char *bytecode = (char *)db + db->bytecode; + assert(ISALIGNED_CL(bytecode)); + + db->magic = HS_DB_MAGIC; + db->version = HS_DB_VERSION; + db->length = len; + db->platform = platform; + + // Copy bytecode + memcpy(bytecode, in_bytecode, len); + + db->crc32 = Crc32c_ComputeBuf(0, bytecode, db->length); + return db; +} + + struct hs_database *build(NG &ng, unsigned int *length, u8 pureFlag) { assert(length); @@ -513,7 +513,7 @@ struct hs_database *build(NG &ng, unsigned int *length, u8 pureFlag) { if (!rose) { throw CompileError("Unable to generate bytecode."); } - *length = rose.size(); + *length = rose.size(); if (!*length) { DEBUG_PRINTF("RoseEngine has zero length\n"); assert(0); @@ -594,42 +594,42 @@ bool isSupported(const Component &c) { } #endif -BuiltExpression buildGraph(ReportManager &rm, const CompileContext &cc, - const ParsedExpression &pe) { - assert(isSupported(*pe.component)); +BuiltExpression buildGraph(ReportManager &rm, const CompileContext &cc, + const ParsedExpression &pe) { + assert(isSupported(*pe.component)); - const auto builder = makeNFABuilder(rm, cc, pe); + const auto builder = makeNFABuilder(rm, cc, pe); assert(builder); // Set up START and ACCEPT states; retrieve the special states - const auto bs = makeGlushkovBuildState(*builder, pe.expr.prefilter); + const auto bs = makeGlushkovBuildState(*builder, pe.expr.prefilter); // Map position IDs to characters/components - pe.component->notePositions(*bs); + pe.component->notePositions(*bs); // Wire the start dotstar state to the firsts - connectInitialStates(*bs, pe); + connectInitialStates(*bs, pe); DEBUG_PRINTF("wire up body of expr\n"); // Build the rest of the FOLLOW set vector<PositionInfo> initials = {builder->getStartDotStar(), builder->getStart()}; - pe.component->buildFollowSet(*bs, initials); + pe.component->buildFollowSet(*bs, initials); // Wire the lasts to the accept state - connectFinalStates(*bs, pe); + connectFinalStates(*bs, pe); // Create our edges bs->buildEdges(); - BuiltExpression built_expr = builder->getGraph(); - assert(built_expr.g); + BuiltExpression built_expr = builder->getGraph(); + assert(built_expr.g); - dumpDotWrapper(*built_expr.g, built_expr.expr, "00_before_asserts", - cc.grey); - removeAssertVertices(rm, *built_expr.g, built_expr.expr); + dumpDotWrapper(*built_expr.g, built_expr.expr, "00_before_asserts", + cc.grey); + removeAssertVertices(rm, *built_expr.g, built_expr.expr); - return built_expr; + return built_expr; } } // namespace ue2 |