aboutsummaryrefslogtreecommitdiffstats
path: root/contrib/libs/hyperscan/src/compiler/compiler.cpp
diff options
context:
space:
mode:
authorIvan Blinkov <ivan@blinkov.ru>2022-02-10 16:47:10 +0300
committerDaniil Cherednik <dcherednik@yandex-team.ru>2022-02-10 16:47:10 +0300
commit1aeb9a455974457866f78722ad98114bafc84e8a (patch)
treee4340eaf1668684d83a0a58c36947c5def5350ad /contrib/libs/hyperscan/src/compiler/compiler.cpp
parentbd5ef432f5cfb1e18851381329d94665a4c22470 (diff)
downloadydb-1aeb9a455974457866f78722ad98114bafc84e8a.tar.gz
Restoring authorship annotation for Ivan Blinkov <ivan@blinkov.ru>. Commit 1 of 2.
Diffstat (limited to 'contrib/libs/hyperscan/src/compiler/compiler.cpp')
-rw-r--r--contrib/libs/hyperscan/src/compiler/compiler.cpp240
1 files changed, 120 insertions, 120 deletions
diff --git a/contrib/libs/hyperscan/src/compiler/compiler.cpp b/contrib/libs/hyperscan/src/compiler/compiler.cpp
index 5751bd64f4..86974f9281 100644
--- a/contrib/libs/hyperscan/src/compiler/compiler.cpp
+++ b/contrib/libs/hyperscan/src/compiler/compiler.cpp
@@ -29,10 +29,10 @@
/** \file
* \brief Compiler front-end interface.
*/
-#include "allocator.h"
+#include "allocator.h"
#include "asserts.h"
#include "compiler.h"
-#include "crc32.h"
+#include "crc32.h"
#include "database.h"
#include "grey.h"
#include "hs_internal.h"
@@ -58,7 +58,7 @@
#include "rose/rose_build.h"
#include "rose/rose_internal.h"
#include "som/slot_manager_dump.h"
-#include "util/bytecode_ptr.h"
+#include "util/bytecode_ptr.h"
#include "util/compile_error.h"
#include "util/target_info.h"
#include "util/verify_types.h"
@@ -80,9 +80,9 @@ static
void validateExt(const hs_expr_ext &ext) {
static const unsigned long long ALL_EXT_FLAGS = HS_EXT_FLAG_MIN_OFFSET |
HS_EXT_FLAG_MAX_OFFSET |
- HS_EXT_FLAG_MIN_LENGTH |
- HS_EXT_FLAG_EDIT_DISTANCE |
- HS_EXT_FLAG_HAMMING_DISTANCE;
+ HS_EXT_FLAG_MIN_LENGTH |
+ HS_EXT_FLAG_EDIT_DISTANCE |
+ HS_EXT_FLAG_HAMMING_DISTANCE;
if (ext.flags & ~ALL_EXT_FLAGS) {
throw CompileError("Invalid hs_expr_ext flag set.");
}
@@ -100,13 +100,13 @@ void validateExt(const hs_expr_ext &ext) {
throw CompileError("In hs_expr_ext, min_length must be less than or "
"equal to max_offset.");
}
-
- if ((ext.flags & HS_EXT_FLAG_EDIT_DISTANCE) &&
- (ext.flags & HS_EXT_FLAG_HAMMING_DISTANCE)) {
- throw CompileError("In hs_expr_ext, cannot have both edit distance and "
- "Hamming distance.");
- }
-
+
+ if ((ext.flags & HS_EXT_FLAG_EDIT_DISTANCE) &&
+ (ext.flags & HS_EXT_FLAG_HAMMING_DISTANCE)) {
+ throw CompileError("In hs_expr_ext, cannot have both edit distance and "
+ "Hamming distance.");
+ }
+
}
void ParsedLitExpression::parseLiteral(const char *expression, size_t len,
@@ -150,10 +150,10 @@ ParsedLitExpression::ParsedLitExpression(unsigned index_in,
}
ParsedExpression::ParsedExpression(unsigned index_in, const char *expression,
- unsigned flags, ReportID report,
+ unsigned flags, ReportID report,
const hs_expr_ext *ext)
- : expr(index_in, flags & HS_FLAG_ALLOWEMPTY, flags & HS_FLAG_SINGLEMATCH,
- false, flags & HS_FLAG_PREFILTER, SOM_NONE, report, 0, MAX_OFFSET,
+ : expr(index_in, flags & HS_FLAG_ALLOWEMPTY, flags & HS_FLAG_SINGLEMATCH,
+ false, flags & HS_FLAG_PREFILTER, SOM_NONE, report, 0, MAX_OFFSET,
0, 0, 0, flags & HS_FLAG_QUIET) {
// We disallow SOM + Quiet.
if ((flags & HS_FLAG_QUIET) && (flags & HS_FLAG_SOM_LEFTMOST)) {
@@ -165,7 +165,7 @@ ParsedExpression::ParsedExpression(unsigned index_in, const char *expression,
component = parse(expression, mode);
- expr.utf8 = mode.utf8; /* utf8 may be set by parse() */
+ expr.utf8 = mode.utf8; /* utf8 may be set by parse() */
const size_t len = strlen(expression);
if (expr.utf8 && !isValidUtf8(expression, len)) {
@@ -196,7 +196,7 @@ ParsedExpression::ParsedExpression(unsigned index_in, const char *expression,
// Set SOM type.
if (flags & HS_FLAG_SOM_LEFTMOST) {
- expr.som = SOM_LEFT;
+ expr.som = SOM_LEFT;
}
// Set extended parameters, if we have them.
@@ -205,32 +205,32 @@ ParsedExpression::ParsedExpression(unsigned index_in, const char *expression,
validateExt(*ext);
if (ext->flags & HS_EXT_FLAG_MIN_OFFSET) {
- expr.min_offset = ext->min_offset;
+ expr.min_offset = ext->min_offset;
}
if (ext->flags & HS_EXT_FLAG_MAX_OFFSET) {
- expr.max_offset = ext->max_offset;
+ expr.max_offset = ext->max_offset;
}
if (ext->flags & HS_EXT_FLAG_MIN_LENGTH) {
- expr.min_length = ext->min_length;
- }
- if (ext->flags & HS_EXT_FLAG_EDIT_DISTANCE) {
- expr.edit_distance = ext->edit_distance;
- }
- if (ext->flags & HS_EXT_FLAG_HAMMING_DISTANCE) {
- expr.hamm_distance = ext->hamming_distance;
+ expr.min_length = ext->min_length;
}
+ if (ext->flags & HS_EXT_FLAG_EDIT_DISTANCE) {
+ expr.edit_distance = ext->edit_distance;
+ }
+ if (ext->flags & HS_EXT_FLAG_HAMMING_DISTANCE) {
+ expr.hamm_distance = ext->hamming_distance;
+ }
}
// These are validated in validateExt, so an error will already have been
// thrown if these conditions don't hold.
- assert(expr.max_offset >= expr.min_offset);
- assert(expr.max_offset >= expr.min_length);
+ assert(expr.max_offset >= expr.min_offset);
+ assert(expr.max_offset >= expr.min_length);
// Since prefiltering and SOM aren't supported together, we must squash any
// min_length constraint as well.
- if (flags & HS_FLAG_PREFILTER && expr.min_length) {
+ if (flags & HS_FLAG_PREFILTER && expr.min_length) {
DEBUG_PRINTF("prefiltering mode: squashing min_length constraint\n");
- expr.min_length = 0;
+ expr.min_length = 0;
}
}
@@ -239,25 +239,25 @@ ParsedExpression::ParsedExpression(unsigned index_in, const char *expression,
* \brief Dumps the parse tree to screen in debug mode and to disk in dump
* mode.
*/
-void dumpExpression(UNUSED const ParsedExpression &pe,
+void dumpExpression(UNUSED const ParsedExpression &pe,
UNUSED const char *stage, UNUSED const Grey &grey) {
#if defined(DEBUG)
- DEBUG_PRINTF("===== Rule ID: %u (expression index: %u) =====\n",
- pe.expr.report, pe.expr.index);
+ DEBUG_PRINTF("===== Rule ID: %u (expression index: %u) =====\n",
+ pe.expr.report, pe.expr.index);
ostringstream debug_tree;
- dumpTree(debug_tree, pe.component.get());
+ dumpTree(debug_tree, pe.component.get());
printf("%s\n", debug_tree.str().c_str());
#endif // DEBUG
#if defined(DUMP_SUPPORT)
if (grey.dumpFlags & Grey::DUMP_PARSE) {
stringstream ss;
- ss << grey.dumpPath << "Expr_" << pe.expr.index << "_componenttree_"
+ ss << grey.dumpPath << "Expr_" << pe.expr.index << "_componenttree_"
<< stage << ".txt";
ofstream out(ss.str().c_str());
- out << "Component Tree for " << pe.expr.report << endl;
- dumpTree(out, pe.component.get());
- if (pe.expr.utf8) {
+ out << "Component Tree for " << pe.expr.report << endl;
+ dumpTree(out, pe.component.get());
+ if (pe.expr.utf8) {
out << "UTF8 mode" << endl;
}
}
@@ -267,13 +267,13 @@ void dumpExpression(UNUSED const ParsedExpression &pe,
/** \brief Run Component tree optimisations on \a expr. */
static
-void optimise(ParsedExpression &pe) {
- if (pe.expr.min_length || pe.expr.som) {
+void optimise(ParsedExpression &pe) {
+ if (pe.expr.min_length || pe.expr.som) {
return;
}
DEBUG_PRINTF("optimising\n");
- pe.component->optimise(true /* root is connected to sds */);
+ pe.component->optimise(true /* root is connected to sds */);
}
void addExpression(NG &ng, unsigned index, const char *expression,
@@ -329,34 +329,34 @@ void addExpression(NG &ng, unsigned index, const char *expression,
// Do per-expression processing: errors here will result in an exception
// being thrown up to our caller
- ParsedExpression pe(index, expression, flags, id, ext);
- dumpExpression(pe, "orig", cc.grey);
+ ParsedExpression pe(index, expression, flags, id, ext);
+ dumpExpression(pe, "orig", cc.grey);
// Apply prefiltering transformations if desired.
- if (pe.expr.prefilter) {
- prefilterTree(pe.component, ParseMode(flags));
- dumpExpression(pe, "prefiltered", cc.grey);
+ if (pe.expr.prefilter) {
+ prefilterTree(pe.component, ParseMode(flags));
+ dumpExpression(pe, "prefiltered", cc.grey);
}
// Expressions containing zero-width assertions and other extended pcre
// types aren't supported yet. This call will throw a ParseError exception
// if the component tree contains such a construct.
- checkUnsupported(*pe.component);
+ checkUnsupported(*pe.component);
- pe.component->checkEmbeddedStartAnchor(true);
- pe.component->checkEmbeddedEndAnchor(true);
+ pe.component->checkEmbeddedStartAnchor(true);
+ pe.component->checkEmbeddedEndAnchor(true);
if (cc.grey.optimiseComponentTree) {
- optimise(pe);
- dumpExpression(pe, "opt", cc.grey);
+ optimise(pe);
+ dumpExpression(pe, "opt", cc.grey);
}
DEBUG_PRINTF("component=%p, nfaId=%u, reportId=%u\n",
- pe.component.get(), pe.expr.index, pe.expr.report);
+ pe.component.get(), pe.expr.index, pe.expr.report);
// You can only use the SOM flags if you've also specified an SOM
// precision mode.
- if (pe.expr.som != SOM_NONE && cc.streaming && !ng.ssm.somPrecision()) {
+ if (pe.expr.som != SOM_NONE && cc.streaming && !ng.ssm.somPrecision()) {
throw CompileError("To use a SOM expression flag in streaming mode, "
"an SOM precision mode (e.g. "
"HS_MODE_SOM_HORIZON_LARGE) must be specified.");
@@ -364,25 +364,25 @@ void addExpression(NG &ng, unsigned index, const char *expression,
// If this expression is a literal, we can feed it directly to Rose rather
// than building the NFA graph.
- if (shortcutLiteral(ng, pe)) {
+ if (shortcutLiteral(ng, pe)) {
DEBUG_PRINTF("took literal short cut\n");
return;
}
- auto built_expr = buildGraph(ng.rm, cc, pe);
- if (!built_expr.g) {
+ auto built_expr = buildGraph(ng.rm, cc, pe);
+ if (!built_expr.g) {
DEBUG_PRINTF("NFA build failed on ID %u, but no exception was "
- "thrown.\n", pe.expr.report);
+ "thrown.\n", pe.expr.report);
throw CompileError("Internal error.");
}
- if (!pe.expr.allow_vacuous && matches_everywhere(*built_expr.g)) {
+ if (!pe.expr.allow_vacuous && matches_everywhere(*built_expr.g)) {
throw CompileError("Pattern matches empty buffer; use "
"HS_FLAG_ALLOWEMPTY to enable support.");
}
- if (!ng.addGraph(built_expr.expr, std::move(built_expr.g))) {
- DEBUG_PRINTF("NFA addGraph failed on ID %u.\n", pe.expr.report);
+ if (!ng.addGraph(built_expr.expr, std::move(built_expr.g))) {
+ DEBUG_PRINTF("NFA addGraph failed on ID %u.\n", pe.expr.report);
throw CompileError("Error compiling expression.");
}
}
@@ -430,7 +430,7 @@ void addLitExpression(NG &ng, unsigned index, const char *expression,
}
static
-bytecode_ptr<RoseEngine> generateRoseEngine(NG &ng) {
+bytecode_ptr<RoseEngine> generateRoseEngine(NG &ng) {
const u32 minWidth =
ng.minWidth.is_finite() ? verify_u32(ng.minWidth) : ROSE_BOUND_INF;
auto rose = ng.rose->buildRose(minWidth);
@@ -455,54 +455,54 @@ platform_t target_to_platform(const target_t &target_info) {
if (!target_info.has_avx2()) {
p |= HS_PLATFORM_NOAVX2;
}
- if (!target_info.has_avx512()) {
- p |= HS_PLATFORM_NOAVX512;
- }
+ if (!target_info.has_avx512()) {
+ p |= HS_PLATFORM_NOAVX512;
+ }
if (!target_info.has_avx512vbmi()) {
p |= HS_PLATFORM_NOAVX512VBMI;
}
return p;
}
-/** \brief Encapsulate the given bytecode (RoseEngine) in a newly-allocated
- * \ref hs_database, ensuring that it is padded correctly to give cacheline
- * alignment. */
-static
-hs_database_t *dbCreate(const char *in_bytecode, size_t len, u64a platform) {
- size_t db_len = sizeof(struct hs_database) + len;
- DEBUG_PRINTF("db size %zu\n", db_len);
- DEBUG_PRINTF("db platform %llx\n", platform);
-
- struct hs_database *db = (struct hs_database *)hs_database_alloc(db_len);
- if (hs_check_alloc(db) != HS_SUCCESS) {
- hs_database_free(db);
- return nullptr;
- }
-
- // So that none of our database is uninitialized
- memset(db, 0, db_len);
-
- // we need to align things manually
- size_t shift = (uintptr_t)db->bytes & 0x3f;
- DEBUG_PRINTF("shift is %zu\n", shift);
-
- db->bytecode = offsetof(struct hs_database, bytes) - shift;
- char *bytecode = (char *)db + db->bytecode;
- assert(ISALIGNED_CL(bytecode));
-
- db->magic = HS_DB_MAGIC;
- db->version = HS_DB_VERSION;
- db->length = len;
- db->platform = platform;
-
- // Copy bytecode
- memcpy(bytecode, in_bytecode, len);
-
- db->crc32 = Crc32c_ComputeBuf(0, bytecode, db->length);
- return db;
-}
-
-
+/** \brief Encapsulate the given bytecode (RoseEngine) in a newly-allocated
+ * \ref hs_database, ensuring that it is padded correctly to give cacheline
+ * alignment. */
+static
+hs_database_t *dbCreate(const char *in_bytecode, size_t len, u64a platform) {
+ size_t db_len = sizeof(struct hs_database) + len;
+ DEBUG_PRINTF("db size %zu\n", db_len);
+ DEBUG_PRINTF("db platform %llx\n", platform);
+
+ struct hs_database *db = (struct hs_database *)hs_database_alloc(db_len);
+ if (hs_check_alloc(db) != HS_SUCCESS) {
+ hs_database_free(db);
+ return nullptr;
+ }
+
+ // So that none of our database is uninitialized
+ memset(db, 0, db_len);
+
+ // we need to align things manually
+ size_t shift = (uintptr_t)db->bytes & 0x3f;
+ DEBUG_PRINTF("shift is %zu\n", shift);
+
+ db->bytecode = offsetof(struct hs_database, bytes) - shift;
+ char *bytecode = (char *)db + db->bytecode;
+ assert(ISALIGNED_CL(bytecode));
+
+ db->magic = HS_DB_MAGIC;
+ db->version = HS_DB_VERSION;
+ db->length = len;
+ db->platform = platform;
+
+ // Copy bytecode
+ memcpy(bytecode, in_bytecode, len);
+
+ db->crc32 = Crc32c_ComputeBuf(0, bytecode, db->length);
+ return db;
+}
+
+
struct hs_database *build(NG &ng, unsigned int *length, u8 pureFlag) {
assert(length);
@@ -513,7 +513,7 @@ struct hs_database *build(NG &ng, unsigned int *length, u8 pureFlag) {
if (!rose) {
throw CompileError("Unable to generate bytecode.");
}
- *length = rose.size();
+ *length = rose.size();
if (!*length) {
DEBUG_PRINTF("RoseEngine has zero length\n");
assert(0);
@@ -594,42 +594,42 @@ bool isSupported(const Component &c) {
}
#endif
-BuiltExpression buildGraph(ReportManager &rm, const CompileContext &cc,
- const ParsedExpression &pe) {
- assert(isSupported(*pe.component));
+BuiltExpression buildGraph(ReportManager &rm, const CompileContext &cc,
+ const ParsedExpression &pe) {
+ assert(isSupported(*pe.component));
- const auto builder = makeNFABuilder(rm, cc, pe);
+ const auto builder = makeNFABuilder(rm, cc, pe);
assert(builder);
// Set up START and ACCEPT states; retrieve the special states
- const auto bs = makeGlushkovBuildState(*builder, pe.expr.prefilter);
+ const auto bs = makeGlushkovBuildState(*builder, pe.expr.prefilter);
// Map position IDs to characters/components
- pe.component->notePositions(*bs);
+ pe.component->notePositions(*bs);
// Wire the start dotstar state to the firsts
- connectInitialStates(*bs, pe);
+ connectInitialStates(*bs, pe);
DEBUG_PRINTF("wire up body of expr\n");
// Build the rest of the FOLLOW set
vector<PositionInfo> initials = {builder->getStartDotStar(),
builder->getStart()};
- pe.component->buildFollowSet(*bs, initials);
+ pe.component->buildFollowSet(*bs, initials);
// Wire the lasts to the accept state
- connectFinalStates(*bs, pe);
+ connectFinalStates(*bs, pe);
// Create our edges
bs->buildEdges();
- BuiltExpression built_expr = builder->getGraph();
- assert(built_expr.g);
+ BuiltExpression built_expr = builder->getGraph();
+ assert(built_expr.g);
- dumpDotWrapper(*built_expr.g, built_expr.expr, "00_before_asserts",
- cc.grey);
- removeAssertVertices(rm, *built_expr.g, built_expr.expr);
+ dumpDotWrapper(*built_expr.g, built_expr.expr, "00_before_asserts",
+ cc.grey);
+ removeAssertVertices(rm, *built_expr.g, built_expr.expr);
- return built_expr;
+ return built_expr;
}
} // namespace ue2