aboutsummaryrefslogtreecommitdiffstats
path: root/contrib/libs/hyperscan/src/rose/rose_build_program.cpp
diff options
context:
space:
mode:
authorthegeorg <thegeorg@yandex-team.ru>2022-02-10 16:45:12 +0300
committerDaniil Cherednik <dcherednik@yandex-team.ru>2022-02-10 16:45:12 +0300
commit49116032d905455a7b1c994e4a696afc885c1e71 (patch)
treebe835aa92c6248212e705f25388ebafcf84bc7a1 /contrib/libs/hyperscan/src/rose/rose_build_program.cpp
parent4e839db24a3bbc9f1c610c43d6faaaa99824dcca (diff)
downloadydb-49116032d905455a7b1c994e4a696afc885c1e71.tar.gz
Restoring authorship annotation for <thegeorg@yandex-team.ru>. Commit 2 of 2.
Diffstat (limited to 'contrib/libs/hyperscan/src/rose/rose_build_program.cpp')
-rw-r--r--contrib/libs/hyperscan/src/rose/rose_build_program.cpp578
1 files changed, 289 insertions, 289 deletions
diff --git a/contrib/libs/hyperscan/src/rose/rose_build_program.cpp b/contrib/libs/hyperscan/src/rose/rose_build_program.cpp
index 7672abe16f..7d1d7ecbb5 100644
--- a/contrib/libs/hyperscan/src/rose/rose_build_program.cpp
+++ b/contrib/libs/hyperscan/src/rose/rose_build_program.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2016-2020, Intel Corporation
+ * Copyright (c) 2016-2020, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
@@ -95,7 +95,7 @@ OffsetMap makeOffsetMap(const RoseProgram &program, u32 *total_len) {
}
RoseProgram::RoseProgram() {
- prog.push_back(std::make_unique<RoseInstrEnd>());
+ prog.push_back(std::make_unique<RoseInstrEnd>());
}
RoseProgram::~RoseProgram() = default;
@@ -280,7 +280,7 @@ void stripCheckHandledInstruction(RoseProgram &prog) {
}
-/** Returns true if the program may read the interpreter's work_done flag */
+/** Returns true if the program may read the interpreter's work_done flag */
static
bool reads_work_done_flag(const RoseProgram &prog) {
for (const auto &ri : prog) {
@@ -297,30 +297,30 @@ void addEnginesEodProgram(u32 eodNfaIterOffset, RoseProgram &program) {
}
RoseProgram block;
- block.add_before_end(std::make_unique<RoseInstrEnginesEod>(eodNfaIterOffset));
+ block.add_before_end(std::make_unique<RoseInstrEnginesEod>(eodNfaIterOffset));
program.add_block(move(block));
}
void addSuffixesEodProgram(RoseProgram &program) {
RoseProgram block;
- block.add_before_end(std::make_unique<RoseInstrSuffixesEod>());
+ block.add_before_end(std::make_unique<RoseInstrSuffixesEod>());
program.add_block(move(block));
}
void addMatcherEodProgram(RoseProgram &program) {
RoseProgram block;
- block.add_before_end(std::make_unique<RoseInstrMatcherEod>());
+ block.add_before_end(std::make_unique<RoseInstrMatcherEod>());
program.add_block(move(block));
}
-void addFlushCombinationProgram(RoseProgram &program) {
- program.add_before_end(std::make_unique<RoseInstrFlushCombination>());
-}
-
-void addLastFlushCombinationProgram(RoseProgram &program) {
- program.add_before_end(std::make_unique<RoseInstrLastFlushCombination>());
-}
-
+void addFlushCombinationProgram(RoseProgram &program) {
+ program.add_before_end(std::make_unique<RoseInstrFlushCombination>());
+}
+
+void addLastFlushCombinationProgram(RoseProgram &program) {
+ program.add_before_end(std::make_unique<RoseInstrLastFlushCombination>());
+}
+
static
void makeRoleCheckLeftfix(const RoseBuildImpl &build,
const map<RoseVertex, left_build_info> &leftfix_info,
@@ -428,7 +428,7 @@ void makeCatchup(const ReportManager &rm, bool needs_catchup,
return;
}
- program.add_before_end(std::make_unique<RoseInstrCatchUp>());
+ program.add_before_end(std::make_unique<RoseInstrCatchUp>());
}
static
@@ -505,23 +505,23 @@ void writeSomOperation(const Report &report, som_operation *op) {
}
static
-void addLogicalSetRequired(const Report &report, ReportManager &rm,
- RoseProgram &program) {
- if (report.lkey == INVALID_LKEY) {
- return;
- }
- // set matching status of current lkey
- auto risl = std::make_unique<RoseInstrSetLogical>(report.lkey,
- report.offsetAdjust);
- program.add_before_end(move(risl));
- // set current lkey's corresponding ckeys active, pending to check
- for (auto ckey : rm.getRelateCKeys(report.lkey)) {
- auto risc = std::make_unique<RoseInstrSetCombination>(ckey);
- program.add_before_end(move(risc));
- }
-}
-
-static
+void addLogicalSetRequired(const Report &report, ReportManager &rm,
+ RoseProgram &program) {
+ if (report.lkey == INVALID_LKEY) {
+ return;
+ }
+ // set matching status of current lkey
+ auto risl = std::make_unique<RoseInstrSetLogical>(report.lkey,
+ report.offsetAdjust);
+ program.add_before_end(move(risl));
+ // set current lkey's corresponding ckeys active, pending to check
+ for (auto ckey : rm.getRelateCKeys(report.lkey)) {
+ auto risc = std::make_unique<RoseInstrSetCombination>(ckey);
+ program.add_before_end(move(risc));
+ }
+}
+
+static
void makeReport(const RoseBuildImpl &build, const ReportID id,
const bool has_som, RoseProgram &program) {
assert(id < build.rm.numReports());
@@ -562,67 +562,67 @@ void makeReport(const RoseBuildImpl &build, const ReportID id,
}
if (report.quashSom) {
- report_block.add_before_end(std::make_unique<RoseInstrSomZero>());
+ report_block.add_before_end(std::make_unique<RoseInstrSomZero>());
}
switch (report.type) {
case EXTERNAL_CALLBACK:
- if (build.rm.numCkeys()) {
- addFlushCombinationProgram(report_block);
- }
+ if (build.rm.numCkeys()) {
+ addFlushCombinationProgram(report_block);
+ }
if (!has_som) {
// Dedupe is only necessary if this report has a dkey, or if there
// are SOM reports to catch up.
bool needs_dedupe = build.rm.getDkey(report) != ~0U || build.hasSom;
if (report.ekey == INVALID_EKEY) {
if (needs_dedupe) {
- if (!report.quiet) {
- report_block.add_before_end(
- std::make_unique<RoseInstrDedupeAndReport>(
- report.quashSom, build.rm.getDkey(report),
- report.onmatch, report.offsetAdjust, end_inst));
- } else {
- makeDedupe(build.rm, report, report_block);
- }
+ if (!report.quiet) {
+ report_block.add_before_end(
+ std::make_unique<RoseInstrDedupeAndReport>(
+ report.quashSom, build.rm.getDkey(report),
+ report.onmatch, report.offsetAdjust, end_inst));
+ } else {
+ makeDedupe(build.rm, report, report_block);
+ }
} else {
- if (!report.quiet) {
- report_block.add_before_end(
- std::make_unique<RoseInstrReport>(
- report.onmatch, report.offsetAdjust));
- }
+ if (!report.quiet) {
+ report_block.add_before_end(
+ std::make_unique<RoseInstrReport>(
+ report.onmatch, report.offsetAdjust));
+ }
}
} else {
if (needs_dedupe) {
makeDedupe(build.rm, report, report_block);
}
- if (!report.quiet) {
- report_block.add_before_end(
- std::make_unique<RoseInstrReportExhaust>(
- report.onmatch, report.offsetAdjust, report.ekey));
- } else {
- report_block.add_before_end(
- std::make_unique<RoseInstrSetExhaust>(report.ekey));
- }
+ if (!report.quiet) {
+ report_block.add_before_end(
+ std::make_unique<RoseInstrReportExhaust>(
+ report.onmatch, report.offsetAdjust, report.ekey));
+ } else {
+ report_block.add_before_end(
+ std::make_unique<RoseInstrSetExhaust>(report.ekey));
+ }
}
} else { // has_som
makeDedupeSom(build.rm, report, report_block);
if (report.ekey == INVALID_EKEY) {
- if (!report.quiet) {
- report_block.add_before_end(std::make_unique<RoseInstrReportSom>(
- report.onmatch, report.offsetAdjust));
- }
+ if (!report.quiet) {
+ report_block.add_before_end(std::make_unique<RoseInstrReportSom>(
+ report.onmatch, report.offsetAdjust));
+ }
} else {
- if (!report.quiet) {
- report_block.add_before_end(
- std::make_unique<RoseInstrReportSomExhaust>(
- report.onmatch, report.offsetAdjust, report.ekey));
- } else {
- report_block.add_before_end(
- std::make_unique<RoseInstrSetExhaust>(report.ekey));
- }
+ if (!report.quiet) {
+ report_block.add_before_end(
+ std::make_unique<RoseInstrReportSomExhaust>(
+ report.onmatch, report.offsetAdjust, report.ekey));
+ } else {
+ report_block.add_before_end(
+ std::make_unique<RoseInstrSetExhaust>(report.ekey));
+ }
}
}
- addLogicalSetRequired(report, build.rm, report_block);
+ addLogicalSetRequired(report, build.rm, report_block);
break;
case INTERNAL_SOM_LOC_SET:
case INTERNAL_SOM_LOC_SET_IF_UNSET:
@@ -635,9 +635,9 @@ void makeReport(const RoseBuildImpl &build, const ReportID id,
case INTERNAL_SOM_LOC_MAKE_WRITABLE:
case INTERNAL_SOM_LOC_SET_FROM:
case INTERNAL_SOM_LOC_SET_FROM_IF_WRITABLE:
- if (build.rm.numCkeys()) {
- addFlushCombinationProgram(report_block);
- }
+ if (build.rm.numCkeys()) {
+ addFlushCombinationProgram(report_block);
+ }
if (has_som) {
auto ri = std::make_unique<RoseInstrReportSomAware>();
writeSomOperation(report, &ri->som);
@@ -649,7 +649,7 @@ void makeReport(const RoseBuildImpl &build, const ReportID id,
}
break;
case INTERNAL_ROSE_CHAIN: {
- report_block.add_before_end(std::make_unique<RoseInstrReportChain>(
+ report_block.add_before_end(std::make_unique<RoseInstrReportChain>(
report.onmatch, report.topSquashDistance));
break;
}
@@ -657,48 +657,48 @@ void makeReport(const RoseBuildImpl &build, const ReportID id,
case EXTERNAL_CALLBACK_SOM_STORED:
case EXTERNAL_CALLBACK_SOM_ABS:
case EXTERNAL_CALLBACK_SOM_REV_NFA:
- if (build.rm.numCkeys()) {
- addFlushCombinationProgram(report_block);
- }
+ if (build.rm.numCkeys()) {
+ addFlushCombinationProgram(report_block);
+ }
makeDedupeSom(build.rm, report, report_block);
if (report.ekey == INVALID_EKEY) {
- if (!report.quiet) {
- report_block.add_before_end(std::make_unique<RoseInstrReportSom>(
- report.onmatch, report.offsetAdjust));
- }
+ if (!report.quiet) {
+ report_block.add_before_end(std::make_unique<RoseInstrReportSom>(
+ report.onmatch, report.offsetAdjust));
+ }
} else {
- if (!report.quiet) {
- report_block.add_before_end(
- std::make_unique<RoseInstrReportSomExhaust>(
- report.onmatch, report.offsetAdjust, report.ekey));
- } else {
- report_block.add_before_end(
- std::make_unique<RoseInstrSetExhaust>(report.ekey));
- }
+ if (!report.quiet) {
+ report_block.add_before_end(
+ std::make_unique<RoseInstrReportSomExhaust>(
+ report.onmatch, report.offsetAdjust, report.ekey));
+ } else {
+ report_block.add_before_end(
+ std::make_unique<RoseInstrSetExhaust>(report.ekey));
+ }
}
- addLogicalSetRequired(report, build.rm, report_block);
+ addLogicalSetRequired(report, build.rm, report_block);
break;
case EXTERNAL_CALLBACK_SOM_PASS:
- if (build.rm.numCkeys()) {
- addFlushCombinationProgram(report_block);
- }
+ if (build.rm.numCkeys()) {
+ addFlushCombinationProgram(report_block);
+ }
makeDedupeSom(build.rm, report, report_block);
if (report.ekey == INVALID_EKEY) {
- if (!report.quiet) {
- report_block.add_before_end(std::make_unique<RoseInstrReportSom>(
- report.onmatch, report.offsetAdjust));
- }
+ if (!report.quiet) {
+ report_block.add_before_end(std::make_unique<RoseInstrReportSom>(
+ report.onmatch, report.offsetAdjust));
+ }
} else {
- if (!report.quiet) {
- report_block.add_before_end(
- std::make_unique<RoseInstrReportSomExhaust>(
- report.onmatch, report.offsetAdjust, report.ekey));
- } else {
- report_block.add_before_end(
- std::make_unique<RoseInstrSetExhaust>(report.ekey));
- }
+ if (!report.quiet) {
+ report_block.add_before_end(
+ std::make_unique<RoseInstrReportSomExhaust>(
+ report.onmatch, report.offsetAdjust, report.ekey));
+ } else {
+ report_block.add_before_end(
+ std::make_unique<RoseInstrSetExhaust>(report.ekey));
+ }
}
- addLogicalSetRequired(report, build.rm, report_block);
+ addLogicalSetRequired(report, build.rm, report_block);
break;
default:
@@ -748,7 +748,7 @@ void makeRoleSetState(const unordered_map<RoseVertex, u32> &roleStateIndices,
if (it == end(roleStateIndices)) {
return;
}
- program.add_before_end(std::make_unique<RoseInstrSetState>(it->second));
+ program.add_before_end(std::make_unique<RoseInstrSetState>(it->second));
}
static
@@ -772,7 +772,7 @@ void makePushDelayedInstructions(const RoseLiteralMap &literals,
});
for (const auto &ri : delay_instructions) {
- program.add_before_end(std::make_unique<RoseInstrPushDelayed>(ri));
+ program.add_before_end(std::make_unique<RoseInstrPushDelayed>(ri));
}
}
@@ -924,7 +924,7 @@ void makeRoleGroups(const RoseGraph &g, ProgramBuild &prog_build,
return;
}
- program.add_before_end(std::make_unique<RoseInstrSetGroups>(groups));
+ program.add_before_end(std::make_unique<RoseInstrSetGroups>(groups));
}
static
@@ -1061,49 +1061,49 @@ bool makeRoleMask32(const vector<LookEntry> &look,
return true;
}
-static
-bool makeRoleMask64(const vector<LookEntry> &look,
- RoseProgram &program, const target_t &target) {
- if (!target.has_avx512()) {
- return false;
- }
-
- if (look.back().offset >= look.front().offset + 64) {
- return false;
- }
- s32 base_offset = verify_s32(look.front().offset);
- array<u8, 64> and_mask, cmp_mask;
- and_mask.fill(0);
- cmp_mask.fill(0);
- u64a neg_mask = 0;
- for (const auto &entry : look) {
- u8 andmask_u8, cmpmask_u8, flip;
- if (!checkReachWithFlip(entry.reach, andmask_u8, cmpmask_u8, flip)) {
- return false;
- }
- u32 shift = entry.offset - base_offset;
- assert(shift < 64);
- and_mask[shift] = andmask_u8;
- cmp_mask[shift] = cmpmask_u8;
- if (flip) {
- neg_mask |= 1ULL << shift;
- }
- }
-
- DEBUG_PRINTF("and_mask %s\n",
- convertMaskstoString(and_mask.data(), 64).c_str());
- DEBUG_PRINTF("cmp_mask %s\n",
- convertMaskstoString(cmp_mask.data(), 64).c_str());
- DEBUG_PRINTF("neg_mask %llx\n", neg_mask);
- DEBUG_PRINTF("base_offset %d\n", base_offset);
-
- const auto *end_inst = program.end_instruction();
- auto ri = std::make_unique<RoseInstrCheckMask64>(and_mask, cmp_mask, neg_mask,
- base_offset, end_inst);
- program.add_before_end(move(ri));
- return true;
-}
-
+static
+bool makeRoleMask64(const vector<LookEntry> &look,
+ RoseProgram &program, const target_t &target) {
+ if (!target.has_avx512()) {
+ return false;
+ }
+
+ if (look.back().offset >= look.front().offset + 64) {
+ return false;
+ }
+ s32 base_offset = verify_s32(look.front().offset);
+ array<u8, 64> and_mask, cmp_mask;
+ and_mask.fill(0);
+ cmp_mask.fill(0);
+ u64a neg_mask = 0;
+ for (const auto &entry : look) {
+ u8 andmask_u8, cmpmask_u8, flip;
+ if (!checkReachWithFlip(entry.reach, andmask_u8, cmpmask_u8, flip)) {
+ return false;
+ }
+ u32 shift = entry.offset - base_offset;
+ assert(shift < 64);
+ and_mask[shift] = andmask_u8;
+ cmp_mask[shift] = cmpmask_u8;
+ if (flip) {
+ neg_mask |= 1ULL << shift;
+ }
+ }
+
+ DEBUG_PRINTF("and_mask %s\n",
+ convertMaskstoString(and_mask.data(), 64).c_str());
+ DEBUG_PRINTF("cmp_mask %s\n",
+ convertMaskstoString(cmp_mask.data(), 64).c_str());
+ DEBUG_PRINTF("neg_mask %llx\n", neg_mask);
+ DEBUG_PRINTF("base_offset %d\n", base_offset);
+
+ const auto *end_inst = program.end_instruction();
+ auto ri = std::make_unique<RoseInstrCheckMask64>(and_mask, cmp_mask, neg_mask,
+ base_offset, end_inst);
+ program.add_before_end(move(ri));
+ return true;
+}
+
// Sorting by the size of every bucket.
// Used in map<u32, vector<s8>, cmpNibble>.
struct cmpNibble {
@@ -1127,7 +1127,7 @@ void getAllBuckets(const vector<LookEntry> &look,
} else {
neg_mask ^= 1ULL << (entry.offset - base_offset);
}
-
+
map <u16, u16> lo2hi;
// We treat Ascii Table as a 16x16 grid.
// Push every row in cr into lo2hi and mark the row number.
@@ -1281,7 +1281,7 @@ makeCheckShufti16x16(u32 offset_range, u8 bucket_idx,
(hi_mask, lo_mask, bucket_select_mask_32,
neg_mask & 0xffff, base_offset, end_inst);
}
-
+
static
unique_ptr<RoseInstruction>
makeCheckShufti32x16(u32 offset_range, u8 bucket_idx,
@@ -1300,83 +1300,83 @@ makeCheckShufti32x16(u32 offset_range, u8 bucket_idx,
}
static
-unique_ptr<RoseInstruction>
-makeCheckShufti64x8(u32 offset_range, u8 bucket_idx,
- const array<u8, 32> &hi_mask, const array<u8, 32> &lo_mask,
- const array<u8, 64> &bucket_select_mask,
- u64a neg_mask, s32 base_offset,
- const RoseInstruction *end_inst) {
- if (offset_range > 64 || bucket_idx > 8) {
- return nullptr;
- }
-
- array<u8, 64> hi_mask_64;
- array<u8, 64> lo_mask_64;
- copy(hi_mask.begin(), hi_mask.begin() + 16, hi_mask_64.begin());
- copy(hi_mask.begin(), hi_mask.begin() + 16, hi_mask_64.begin() + 16);
- copy(hi_mask.begin(), hi_mask.begin() + 16, hi_mask_64.begin() + 32);
- copy(hi_mask.begin(), hi_mask.begin() + 16, hi_mask_64.begin() + 48);
- copy(lo_mask.begin(), lo_mask.begin() + 16, lo_mask_64.begin());
- copy(lo_mask.begin(), lo_mask.begin() + 16, lo_mask_64.begin() + 16);
- copy(lo_mask.begin(), lo_mask.begin() + 16, lo_mask_64.begin() + 32);
- copy(lo_mask.begin(), lo_mask.begin() + 16, lo_mask_64.begin() + 48);
-
- return std::make_unique<RoseInstrCheckShufti64x8>
- (hi_mask_64, lo_mask_64, bucket_select_mask,
- neg_mask, base_offset, end_inst);
-}
-
-static
-unique_ptr<RoseInstruction>
-makeCheckShufti64x16(u32 offset_range, u8 bucket_idx,
- const array<u8, 32> &hi_mask, const array<u8, 32> &lo_mask,
- const array<u8, 64> &bucket_select_mask_lo,
- const array<u8, 64> &bucket_select_mask_hi,
- u64a neg_mask, s32 base_offset,
- const RoseInstruction *end_inst) {
- if (offset_range > 64 || bucket_idx > 16) {
- return nullptr;
- }
-
- array<u8, 64> hi_mask_1;
- array<u8, 64> hi_mask_2;
- array<u8, 64> lo_mask_1;
- array<u8, 64> lo_mask_2;
-
- copy(hi_mask.begin(), hi_mask.begin() + 16, hi_mask_1.begin());
- copy(hi_mask.begin(), hi_mask.begin() + 16, hi_mask_1.begin() + 16);
- copy(hi_mask.begin(), hi_mask.begin() + 16, hi_mask_1.begin() + 32);
- copy(hi_mask.begin(), hi_mask.begin() + 16, hi_mask_1.begin() + 48);
- copy(hi_mask.begin() + 16, hi_mask.begin() + 32, hi_mask_2.begin());
- copy(hi_mask.begin() + 16, hi_mask.begin() + 32, hi_mask_2.begin() + 16);
- copy(hi_mask.begin() + 16, hi_mask.begin() + 32, hi_mask_2.begin() + 32);
- copy(hi_mask.begin() + 16, hi_mask.begin() + 32, hi_mask_2.begin() + 48);
-
- copy(lo_mask.begin(), lo_mask.begin() + 16, lo_mask_1.begin());
- copy(lo_mask.begin(), lo_mask.begin() + 16, lo_mask_1.begin() + 16);
- copy(lo_mask.begin(), lo_mask.begin() + 16, lo_mask_1.begin() + 32);
- copy(lo_mask.begin(), lo_mask.begin() + 16, lo_mask_1.begin() + 48);
- copy(lo_mask.begin() + 16, lo_mask.begin() + 32, lo_mask_2.begin());
- copy(lo_mask.begin() + 16, lo_mask.begin() + 32, lo_mask_2.begin() + 16);
- copy(lo_mask.begin() + 16, lo_mask.begin() + 32, lo_mask_2.begin() + 32);
- copy(lo_mask.begin() + 16, lo_mask.begin() + 32, lo_mask_2.begin() + 48);
-
- return std::make_unique<RoseInstrCheckShufti64x16>
- (hi_mask_1, hi_mask_2, lo_mask_1, lo_mask_2, bucket_select_mask_hi,
- bucket_select_mask_lo, neg_mask, base_offset, end_inst);
-}
-
-static
-bool makeRoleShufti(const vector<LookEntry> &look, RoseProgram &program,
- const target_t &target) {
- s32 offset_limit;
- if (target.has_avx512()) {
- offset_limit = 64;
- } else {
- offset_limit = 32;
- }
+unique_ptr<RoseInstruction>
+makeCheckShufti64x8(u32 offset_range, u8 bucket_idx,
+ const array<u8, 32> &hi_mask, const array<u8, 32> &lo_mask,
+ const array<u8, 64> &bucket_select_mask,
+ u64a neg_mask, s32 base_offset,
+ const RoseInstruction *end_inst) {
+ if (offset_range > 64 || bucket_idx > 8) {
+ return nullptr;
+ }
+
+ array<u8, 64> hi_mask_64;
+ array<u8, 64> lo_mask_64;
+ copy(hi_mask.begin(), hi_mask.begin() + 16, hi_mask_64.begin());
+ copy(hi_mask.begin(), hi_mask.begin() + 16, hi_mask_64.begin() + 16);
+ copy(hi_mask.begin(), hi_mask.begin() + 16, hi_mask_64.begin() + 32);
+ copy(hi_mask.begin(), hi_mask.begin() + 16, hi_mask_64.begin() + 48);
+ copy(lo_mask.begin(), lo_mask.begin() + 16, lo_mask_64.begin());
+ copy(lo_mask.begin(), lo_mask.begin() + 16, lo_mask_64.begin() + 16);
+ copy(lo_mask.begin(), lo_mask.begin() + 16, lo_mask_64.begin() + 32);
+ copy(lo_mask.begin(), lo_mask.begin() + 16, lo_mask_64.begin() + 48);
+
+ return std::make_unique<RoseInstrCheckShufti64x8>
+ (hi_mask_64, lo_mask_64, bucket_select_mask,
+ neg_mask, base_offset, end_inst);
+}
+
+static
+unique_ptr<RoseInstruction>
+makeCheckShufti64x16(u32 offset_range, u8 bucket_idx,
+ const array<u8, 32> &hi_mask, const array<u8, 32> &lo_mask,
+ const array<u8, 64> &bucket_select_mask_lo,
+ const array<u8, 64> &bucket_select_mask_hi,
+ u64a neg_mask, s32 base_offset,
+ const RoseInstruction *end_inst) {
+ if (offset_range > 64 || bucket_idx > 16) {
+ return nullptr;
+ }
+
+ array<u8, 64> hi_mask_1;
+ array<u8, 64> hi_mask_2;
+ array<u8, 64> lo_mask_1;
+ array<u8, 64> lo_mask_2;
+
+ copy(hi_mask.begin(), hi_mask.begin() + 16, hi_mask_1.begin());
+ copy(hi_mask.begin(), hi_mask.begin() + 16, hi_mask_1.begin() + 16);
+ copy(hi_mask.begin(), hi_mask.begin() + 16, hi_mask_1.begin() + 32);
+ copy(hi_mask.begin(), hi_mask.begin() + 16, hi_mask_1.begin() + 48);
+ copy(hi_mask.begin() + 16, hi_mask.begin() + 32, hi_mask_2.begin());
+ copy(hi_mask.begin() + 16, hi_mask.begin() + 32, hi_mask_2.begin() + 16);
+ copy(hi_mask.begin() + 16, hi_mask.begin() + 32, hi_mask_2.begin() + 32);
+ copy(hi_mask.begin() + 16, hi_mask.begin() + 32, hi_mask_2.begin() + 48);
+
+ copy(lo_mask.begin(), lo_mask.begin() + 16, lo_mask_1.begin());
+ copy(lo_mask.begin(), lo_mask.begin() + 16, lo_mask_1.begin() + 16);
+ copy(lo_mask.begin(), lo_mask.begin() + 16, lo_mask_1.begin() + 32);
+ copy(lo_mask.begin(), lo_mask.begin() + 16, lo_mask_1.begin() + 48);
+ copy(lo_mask.begin() + 16, lo_mask.begin() + 32, lo_mask_2.begin());
+ copy(lo_mask.begin() + 16, lo_mask.begin() + 32, lo_mask_2.begin() + 16);
+ copy(lo_mask.begin() + 16, lo_mask.begin() + 32, lo_mask_2.begin() + 32);
+ copy(lo_mask.begin() + 16, lo_mask.begin() + 32, lo_mask_2.begin() + 48);
+
+ return std::make_unique<RoseInstrCheckShufti64x16>
+ (hi_mask_1, hi_mask_2, lo_mask_1, lo_mask_2, bucket_select_mask_hi,
+ bucket_select_mask_lo, neg_mask, base_offset, end_inst);
+}
+
+static
+bool makeRoleShufti(const vector<LookEntry> &look, RoseProgram &program,
+ const target_t &target) {
+ s32 offset_limit;
+ if (target.has_avx512()) {
+ offset_limit = 64;
+ } else {
+ offset_limit = 32;
+ }
s32 base_offset = verify_s32(look.front().offset);
- if (look.back().offset >= base_offset + offset_limit) {
+ if (look.back().offset >= base_offset + offset_limit) {
return false;
}
@@ -1384,40 +1384,40 @@ bool makeRoleShufti(const vector<LookEntry> &look, RoseProgram &program,
u64a neg_mask_64;
array<u8, 32> hi_mask;
array<u8, 32> lo_mask;
- array<u8, 64> bucket_select_hi_64; // for AVX512
- array<u8, 64> bucket_select_lo_64; // for AVX512
+ array<u8, 64> bucket_select_hi_64; // for AVX512
+ array<u8, 64> bucket_select_lo_64; // for AVX512
array<u8, 32> bucket_select_hi;
array<u8, 32> bucket_select_lo;
hi_mask.fill(0);
lo_mask.fill(0);
- bucket_select_hi_64.fill(0);
- bucket_select_lo_64.fill(0);
+ bucket_select_hi_64.fill(0);
+ bucket_select_lo_64.fill(0);
bucket_select_hi.fill(0); // will not be used in 16x8 and 32x8.
bucket_select_lo.fill(0);
- if (target.has_avx512()) {
- if (!getShuftiMasks(look, hi_mask, lo_mask, bucket_select_hi_64.data(),
- bucket_select_lo_64.data(), neg_mask_64, bucket_idx,
- 32)) {
- return false;
- }
- copy(bucket_select_hi_64.begin(), bucket_select_hi_64.begin() + 32,
- bucket_select_hi.begin());
- copy(bucket_select_lo_64.begin(), bucket_select_lo_64.begin() + 32,
- bucket_select_lo.begin());
-
- DEBUG_PRINTF("bucket_select_hi_64 %s\n",
- convertMaskstoString(bucket_select_hi_64.data(), 64).c_str());
- DEBUG_PRINTF("bucket_select_lo_64 %s\n",
- convertMaskstoString(bucket_select_lo_64.data(), 64).c_str());
- } else {
- if (!getShuftiMasks(look, hi_mask, lo_mask, bucket_select_hi.data(),
- bucket_select_lo.data(), neg_mask_64, bucket_idx,
- 32)) {
- return false;
- }
- }
-
+ if (target.has_avx512()) {
+ if (!getShuftiMasks(look, hi_mask, lo_mask, bucket_select_hi_64.data(),
+ bucket_select_lo_64.data(), neg_mask_64, bucket_idx,
+ 32)) {
+ return false;
+ }
+ copy(bucket_select_hi_64.begin(), bucket_select_hi_64.begin() + 32,
+ bucket_select_hi.begin());
+ copy(bucket_select_lo_64.begin(), bucket_select_lo_64.begin() + 32,
+ bucket_select_lo.begin());
+
+ DEBUG_PRINTF("bucket_select_hi_64 %s\n",
+ convertMaskstoString(bucket_select_hi_64.data(), 64).c_str());
+ DEBUG_PRINTF("bucket_select_lo_64 %s\n",
+ convertMaskstoString(bucket_select_lo_64.data(), 64).c_str());
+ } else {
+ if (!getShuftiMasks(look, hi_mask, lo_mask, bucket_select_hi.data(),
+ bucket_select_lo.data(), neg_mask_64, bucket_idx,
+ 32)) {
+ return false;
+ }
+ }
+
u32 neg_mask = (u32)neg_mask_64;
DEBUG_PRINTF("hi_mask %s\n",
@@ -1440,13 +1440,13 @@ bool makeRoleShufti(const vector<LookEntry> &look, RoseProgram &program,
bucket_select_lo, neg_mask, base_offset,
end_inst);
}
- if (target.has_avx512()) {
- if (!ri) {
- ri = makeCheckShufti64x8(offset_range, bucket_idx, hi_mask, lo_mask,
- bucket_select_lo_64, neg_mask_64,
- base_offset, end_inst);
- }
- }
+ if (target.has_avx512()) {
+ if (!ri) {
+ ri = makeCheckShufti64x8(offset_range, bucket_idx, hi_mask, lo_mask,
+ bucket_select_lo_64, neg_mask_64,
+ base_offset, end_inst);
+ }
+ }
if (!ri) {
ri = makeCheckShufti16x16(offset_range, bucket_idx, hi_mask, lo_mask,
bucket_select_lo, bucket_select_hi,
@@ -1457,13 +1457,13 @@ bool makeRoleShufti(const vector<LookEntry> &look, RoseProgram &program,
bucket_select_lo, bucket_select_hi,
neg_mask, base_offset, end_inst);
}
- if (target.has_avx512()) {
- if (!ri) {
- ri = makeCheckShufti64x16(offset_range, bucket_idx, hi_mask, lo_mask,
- bucket_select_lo_64, bucket_select_hi_64,
- neg_mask_64, base_offset, end_inst);
- }
- }
+ if (target.has_avx512()) {
+ if (!ri) {
+ ri = makeCheckShufti64x16(offset_range, bucket_idx, hi_mask, lo_mask,
+ bucket_select_lo_64, bucket_select_hi_64,
+ neg_mask_64, base_offset, end_inst);
+ }
+ }
assert(ri);
program.add_before_end(move(ri));
@@ -1476,7 +1476,7 @@ bool makeRoleShufti(const vector<LookEntry> &look, RoseProgram &program,
*/
static
void makeLookaroundInstruction(const vector<LookEntry> &look,
- RoseProgram &program, const target_t &target) {
+ RoseProgram &program, const target_t &target) {
assert(!look.empty());
if (makeRoleByte(look, program)) {
@@ -1500,14 +1500,14 @@ void makeLookaroundInstruction(const vector<LookEntry> &look,
return;
}
- if (makeRoleMask64(look, program, target)) {
+ if (makeRoleMask64(look, program, target)) {
+ return;
+ }
+
+ if (makeRoleShufti(look, program, target)) {
return;
}
- if (makeRoleShufti(look, program, target)) {
- return;
- }
-
auto ri = std::make_unique<RoseInstrCheckLookaround>(look,
program.end_instruction());
program.add_before_end(move(ri));
@@ -1545,7 +1545,7 @@ void makeCheckLitMaskInstruction(const RoseBuildImpl &build, u32 lit_id,
return; // all caseful chars handled by HWLM mask.
}
- makeLookaroundInstruction(look, program, build.cc.target_info);
+ makeLookaroundInstruction(look, program, build.cc.target_info);
}
static
@@ -1584,7 +1584,7 @@ void makeCheckLitEarlyInstruction(const RoseBuildImpl &build, u32 lit_id,
DEBUG_PRINTF("adding lit early check, min_offset=%u\n", min_offset);
const auto *end = prog.end_instruction();
- prog.add_before_end(std::make_unique<RoseInstrCheckLitEarly>(min_offset, end));
+ prog.add_before_end(std::make_unique<RoseInstrCheckLitEarly>(min_offset, end));
}
static
@@ -1595,7 +1595,7 @@ void makeGroupCheckInstruction(const RoseBuildImpl &build, u32 lit_id,
if (!info.group_mask) {
return;
}
- prog.add_before_end(std::make_unique<RoseInstrCheckGroups>(info.group_mask));
+ prog.add_before_end(std::make_unique<RoseInstrCheckGroups>(info.group_mask));
}
static
@@ -1889,7 +1889,7 @@ void makeRoleLookaround(const RoseBuildImpl &build,
findLookaroundMasks(build, v, look_more);
mergeLookaround(look, look_more);
if (!look.empty()) {
- makeLookaroundInstruction(look, program, build.cc.target_info);
+ makeLookaroundInstruction(look, program, build.cc.target_info);
}
return;
}
@@ -1932,7 +1932,7 @@ void makeRoleSuffix(const RoseBuildImpl &build,
event = MQE_TOP;
}
- prog.add_before_end(std::make_unique<RoseInstrTriggerSuffix>(queue, event));
+ prog.add_before_end(std::make_unique<RoseInstrTriggerSuffix>(queue, event));
}
static
@@ -2039,7 +2039,7 @@ static
void addCheckOnlyEodInstruction(RoseProgram &prog) {
DEBUG_PRINTF("only at eod\n");
const auto *end_inst = prog.end_instruction();
- prog.add_before_end(std::make_unique<RoseInstrCheckOnlyEod>(end_inst));
+ prog.add_before_end(std::make_unique<RoseInstrCheckOnlyEod>(end_inst));
}
static
@@ -2071,7 +2071,7 @@ void makeRoleEagerEodReports(const RoseBuildImpl &build,
program.add_before_end(move(eod_program));
}
-/** Makes a program for a role/vertex given a specific pred/in_edge. */
+/** Makes a program for a role/vertex given a specific pred/in_edge. */
static
RoseProgram makeRoleProgram(const RoseBuildImpl &build,
const map<RoseVertex, left_build_info> &leftfix_info,
@@ -2164,7 +2164,7 @@ void makeGroupSquashInstruction(const RoseBuildImpl &build, u32 lit_id,
DEBUG_PRINTF("squashes 0x%llx\n", info.group_mask);
assert(info.group_mask);
/* Note: group_mask is negated. */
- prog.add_before_end(std::make_unique<RoseInstrSquashGroups>(~info.group_mask));
+ prog.add_before_end(std::make_unique<RoseInstrSquashGroups>(~info.group_mask));
}
namespace {
@@ -2209,7 +2209,7 @@ RoseProgram assembleProgramBlocks(vector<RoseProgram> &&blocks_in) {
* only set if a state has been. */
if (!prog.empty() && reads_work_done_flag(block)) {
RoseProgram clear_block;
- clear_block.add_before_end(std::make_unique<RoseInstrClearWorkDone>());
+ clear_block.add_before_end(std::make_unique<RoseInstrClearWorkDone>());
prog.add_block(move(clear_block));
}
@@ -2279,7 +2279,7 @@ RoseProgram makeLiteralProgram(const RoseBuildImpl &build,
}
if (lit_id == build.eod_event_literal_id) {
- /* Note: does not require the lit initial program */
+ /* Note: does not require the lit initial program */
assert(build.eod_event_literal_id != MO_INVALID_IDX);
return role_programs;
}
@@ -2369,7 +2369,7 @@ void makeCatchupMpv(const ReportManager &rm, bool needs_mpv_catchup,
return;
}
- program.add_before_end(std::make_unique<RoseInstrCatchUpMpv>());
+ program.add_before_end(std::make_unique<RoseInstrCatchUpMpv>());
}
RoseProgram makeReportProgram(const RoseBuildImpl &build,
@@ -2402,7 +2402,7 @@ RoseProgram makeBoundaryProgram(const RoseBuildImpl &build,
void addIncludedJumpProgram(RoseProgram &program, u32 child_offset,
u8 squash) {
RoseProgram block;
- block.add_before_end(std::make_unique<RoseInstrIncludedJump>(child_offset,
+ block.add_before_end(std::make_unique<RoseInstrIncludedJump>(child_offset,
squash));
program.add_block(move(block));
}