diff options
author | thegeorg <thegeorg@yandex-team.ru> | 2022-02-10 16:45:12 +0300 |
---|---|---|
committer | Daniil Cherednik <dcherednik@yandex-team.ru> | 2022-02-10 16:45:12 +0300 |
commit | 49116032d905455a7b1c994e4a696afc885c1e71 (patch) | |
tree | be835aa92c6248212e705f25388ebafcf84bc7a1 /contrib/libs/hyperscan/src/rose/rose_build_program.cpp | |
parent | 4e839db24a3bbc9f1c610c43d6faaaa99824dcca (diff) | |
download | ydb-49116032d905455a7b1c994e4a696afc885c1e71.tar.gz |
Restoring authorship annotation for <thegeorg@yandex-team.ru>. Commit 2 of 2.
Diffstat (limited to 'contrib/libs/hyperscan/src/rose/rose_build_program.cpp')
-rw-r--r-- | contrib/libs/hyperscan/src/rose/rose_build_program.cpp | 578 |
1 files changed, 289 insertions, 289 deletions
diff --git a/contrib/libs/hyperscan/src/rose/rose_build_program.cpp b/contrib/libs/hyperscan/src/rose/rose_build_program.cpp index 7672abe16f..7d1d7ecbb5 100644 --- a/contrib/libs/hyperscan/src/rose/rose_build_program.cpp +++ b/contrib/libs/hyperscan/src/rose/rose_build_program.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2020, Intel Corporation + * Copyright (c) 2016-2020, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -95,7 +95,7 @@ OffsetMap makeOffsetMap(const RoseProgram &program, u32 *total_len) { } RoseProgram::RoseProgram() { - prog.push_back(std::make_unique<RoseInstrEnd>()); + prog.push_back(std::make_unique<RoseInstrEnd>()); } RoseProgram::~RoseProgram() = default; @@ -280,7 +280,7 @@ void stripCheckHandledInstruction(RoseProgram &prog) { } -/** Returns true if the program may read the interpreter's work_done flag */ +/** Returns true if the program may read the interpreter's work_done flag */ static bool reads_work_done_flag(const RoseProgram &prog) { for (const auto &ri : prog) { @@ -297,30 +297,30 @@ void addEnginesEodProgram(u32 eodNfaIterOffset, RoseProgram &program) { } RoseProgram block; - block.add_before_end(std::make_unique<RoseInstrEnginesEod>(eodNfaIterOffset)); + block.add_before_end(std::make_unique<RoseInstrEnginesEod>(eodNfaIterOffset)); program.add_block(move(block)); } void addSuffixesEodProgram(RoseProgram &program) { RoseProgram block; - block.add_before_end(std::make_unique<RoseInstrSuffixesEod>()); + block.add_before_end(std::make_unique<RoseInstrSuffixesEod>()); program.add_block(move(block)); } void addMatcherEodProgram(RoseProgram &program) { RoseProgram block; - block.add_before_end(std::make_unique<RoseInstrMatcherEod>()); + block.add_before_end(std::make_unique<RoseInstrMatcherEod>()); program.add_block(move(block)); } -void addFlushCombinationProgram(RoseProgram &program) { - program.add_before_end(std::make_unique<RoseInstrFlushCombination>()); -} - -void addLastFlushCombinationProgram(RoseProgram &program) { - program.add_before_end(std::make_unique<RoseInstrLastFlushCombination>()); -} - +void addFlushCombinationProgram(RoseProgram &program) { + program.add_before_end(std::make_unique<RoseInstrFlushCombination>()); +} + +void addLastFlushCombinationProgram(RoseProgram &program) { + program.add_before_end(std::make_unique<RoseInstrLastFlushCombination>()); +} + static void makeRoleCheckLeftfix(const RoseBuildImpl &build, const map<RoseVertex, left_build_info> &leftfix_info, @@ -428,7 +428,7 @@ void makeCatchup(const ReportManager &rm, bool needs_catchup, return; } - program.add_before_end(std::make_unique<RoseInstrCatchUp>()); + program.add_before_end(std::make_unique<RoseInstrCatchUp>()); } static @@ -505,23 +505,23 @@ void writeSomOperation(const Report &report, som_operation *op) { } static -void addLogicalSetRequired(const Report &report, ReportManager &rm, - RoseProgram &program) { - if (report.lkey == INVALID_LKEY) { - return; - } - // set matching status of current lkey - auto risl = std::make_unique<RoseInstrSetLogical>(report.lkey, - report.offsetAdjust); - program.add_before_end(move(risl)); - // set current lkey's corresponding ckeys active, pending to check - for (auto ckey : rm.getRelateCKeys(report.lkey)) { - auto risc = std::make_unique<RoseInstrSetCombination>(ckey); - program.add_before_end(move(risc)); - } -} - -static +void addLogicalSetRequired(const Report &report, ReportManager &rm, + RoseProgram &program) { + if (report.lkey == INVALID_LKEY) { + return; + } + // set matching status of current lkey + auto risl = std::make_unique<RoseInstrSetLogical>(report.lkey, + report.offsetAdjust); + program.add_before_end(move(risl)); + // set current lkey's corresponding ckeys active, pending to check + for (auto ckey : rm.getRelateCKeys(report.lkey)) { + auto risc = std::make_unique<RoseInstrSetCombination>(ckey); + program.add_before_end(move(risc)); + } +} + +static void makeReport(const RoseBuildImpl &build, const ReportID id, const bool has_som, RoseProgram &program) { assert(id < build.rm.numReports()); @@ -562,67 +562,67 @@ void makeReport(const RoseBuildImpl &build, const ReportID id, } if (report.quashSom) { - report_block.add_before_end(std::make_unique<RoseInstrSomZero>()); + report_block.add_before_end(std::make_unique<RoseInstrSomZero>()); } switch (report.type) { case EXTERNAL_CALLBACK: - if (build.rm.numCkeys()) { - addFlushCombinationProgram(report_block); - } + if (build.rm.numCkeys()) { + addFlushCombinationProgram(report_block); + } if (!has_som) { // Dedupe is only necessary if this report has a dkey, or if there // are SOM reports to catch up. bool needs_dedupe = build.rm.getDkey(report) != ~0U || build.hasSom; if (report.ekey == INVALID_EKEY) { if (needs_dedupe) { - if (!report.quiet) { - report_block.add_before_end( - std::make_unique<RoseInstrDedupeAndReport>( - report.quashSom, build.rm.getDkey(report), - report.onmatch, report.offsetAdjust, end_inst)); - } else { - makeDedupe(build.rm, report, report_block); - } + if (!report.quiet) { + report_block.add_before_end( + std::make_unique<RoseInstrDedupeAndReport>( + report.quashSom, build.rm.getDkey(report), + report.onmatch, report.offsetAdjust, end_inst)); + } else { + makeDedupe(build.rm, report, report_block); + } } else { - if (!report.quiet) { - report_block.add_before_end( - std::make_unique<RoseInstrReport>( - report.onmatch, report.offsetAdjust)); - } + if (!report.quiet) { + report_block.add_before_end( + std::make_unique<RoseInstrReport>( + report.onmatch, report.offsetAdjust)); + } } } else { if (needs_dedupe) { makeDedupe(build.rm, report, report_block); } - if (!report.quiet) { - report_block.add_before_end( - std::make_unique<RoseInstrReportExhaust>( - report.onmatch, report.offsetAdjust, report.ekey)); - } else { - report_block.add_before_end( - std::make_unique<RoseInstrSetExhaust>(report.ekey)); - } + if (!report.quiet) { + report_block.add_before_end( + std::make_unique<RoseInstrReportExhaust>( + report.onmatch, report.offsetAdjust, report.ekey)); + } else { + report_block.add_before_end( + std::make_unique<RoseInstrSetExhaust>(report.ekey)); + } } } else { // has_som makeDedupeSom(build.rm, report, report_block); if (report.ekey == INVALID_EKEY) { - if (!report.quiet) { - report_block.add_before_end(std::make_unique<RoseInstrReportSom>( - report.onmatch, report.offsetAdjust)); - } + if (!report.quiet) { + report_block.add_before_end(std::make_unique<RoseInstrReportSom>( + report.onmatch, report.offsetAdjust)); + } } else { - if (!report.quiet) { - report_block.add_before_end( - std::make_unique<RoseInstrReportSomExhaust>( - report.onmatch, report.offsetAdjust, report.ekey)); - } else { - report_block.add_before_end( - std::make_unique<RoseInstrSetExhaust>(report.ekey)); - } + if (!report.quiet) { + report_block.add_before_end( + std::make_unique<RoseInstrReportSomExhaust>( + report.onmatch, report.offsetAdjust, report.ekey)); + } else { + report_block.add_before_end( + std::make_unique<RoseInstrSetExhaust>(report.ekey)); + } } } - addLogicalSetRequired(report, build.rm, report_block); + addLogicalSetRequired(report, build.rm, report_block); break; case INTERNAL_SOM_LOC_SET: case INTERNAL_SOM_LOC_SET_IF_UNSET: @@ -635,9 +635,9 @@ void makeReport(const RoseBuildImpl &build, const ReportID id, case INTERNAL_SOM_LOC_MAKE_WRITABLE: case INTERNAL_SOM_LOC_SET_FROM: case INTERNAL_SOM_LOC_SET_FROM_IF_WRITABLE: - if (build.rm.numCkeys()) { - addFlushCombinationProgram(report_block); - } + if (build.rm.numCkeys()) { + addFlushCombinationProgram(report_block); + } if (has_som) { auto ri = std::make_unique<RoseInstrReportSomAware>(); writeSomOperation(report, &ri->som); @@ -649,7 +649,7 @@ void makeReport(const RoseBuildImpl &build, const ReportID id, } break; case INTERNAL_ROSE_CHAIN: { - report_block.add_before_end(std::make_unique<RoseInstrReportChain>( + report_block.add_before_end(std::make_unique<RoseInstrReportChain>( report.onmatch, report.topSquashDistance)); break; } @@ -657,48 +657,48 @@ void makeReport(const RoseBuildImpl &build, const ReportID id, case EXTERNAL_CALLBACK_SOM_STORED: case EXTERNAL_CALLBACK_SOM_ABS: case EXTERNAL_CALLBACK_SOM_REV_NFA: - if (build.rm.numCkeys()) { - addFlushCombinationProgram(report_block); - } + if (build.rm.numCkeys()) { + addFlushCombinationProgram(report_block); + } makeDedupeSom(build.rm, report, report_block); if (report.ekey == INVALID_EKEY) { - if (!report.quiet) { - report_block.add_before_end(std::make_unique<RoseInstrReportSom>( - report.onmatch, report.offsetAdjust)); - } + if (!report.quiet) { + report_block.add_before_end(std::make_unique<RoseInstrReportSom>( + report.onmatch, report.offsetAdjust)); + } } else { - if (!report.quiet) { - report_block.add_before_end( - std::make_unique<RoseInstrReportSomExhaust>( - report.onmatch, report.offsetAdjust, report.ekey)); - } else { - report_block.add_before_end( - std::make_unique<RoseInstrSetExhaust>(report.ekey)); - } + if (!report.quiet) { + report_block.add_before_end( + std::make_unique<RoseInstrReportSomExhaust>( + report.onmatch, report.offsetAdjust, report.ekey)); + } else { + report_block.add_before_end( + std::make_unique<RoseInstrSetExhaust>(report.ekey)); + } } - addLogicalSetRequired(report, build.rm, report_block); + addLogicalSetRequired(report, build.rm, report_block); break; case EXTERNAL_CALLBACK_SOM_PASS: - if (build.rm.numCkeys()) { - addFlushCombinationProgram(report_block); - } + if (build.rm.numCkeys()) { + addFlushCombinationProgram(report_block); + } makeDedupeSom(build.rm, report, report_block); if (report.ekey == INVALID_EKEY) { - if (!report.quiet) { - report_block.add_before_end(std::make_unique<RoseInstrReportSom>( - report.onmatch, report.offsetAdjust)); - } + if (!report.quiet) { + report_block.add_before_end(std::make_unique<RoseInstrReportSom>( + report.onmatch, report.offsetAdjust)); + } } else { - if (!report.quiet) { - report_block.add_before_end( - std::make_unique<RoseInstrReportSomExhaust>( - report.onmatch, report.offsetAdjust, report.ekey)); - } else { - report_block.add_before_end( - std::make_unique<RoseInstrSetExhaust>(report.ekey)); - } + if (!report.quiet) { + report_block.add_before_end( + std::make_unique<RoseInstrReportSomExhaust>( + report.onmatch, report.offsetAdjust, report.ekey)); + } else { + report_block.add_before_end( + std::make_unique<RoseInstrSetExhaust>(report.ekey)); + } } - addLogicalSetRequired(report, build.rm, report_block); + addLogicalSetRequired(report, build.rm, report_block); break; default: @@ -748,7 +748,7 @@ void makeRoleSetState(const unordered_map<RoseVertex, u32> &roleStateIndices, if (it == end(roleStateIndices)) { return; } - program.add_before_end(std::make_unique<RoseInstrSetState>(it->second)); + program.add_before_end(std::make_unique<RoseInstrSetState>(it->second)); } static @@ -772,7 +772,7 @@ void makePushDelayedInstructions(const RoseLiteralMap &literals, }); for (const auto &ri : delay_instructions) { - program.add_before_end(std::make_unique<RoseInstrPushDelayed>(ri)); + program.add_before_end(std::make_unique<RoseInstrPushDelayed>(ri)); } } @@ -924,7 +924,7 @@ void makeRoleGroups(const RoseGraph &g, ProgramBuild &prog_build, return; } - program.add_before_end(std::make_unique<RoseInstrSetGroups>(groups)); + program.add_before_end(std::make_unique<RoseInstrSetGroups>(groups)); } static @@ -1061,49 +1061,49 @@ bool makeRoleMask32(const vector<LookEntry> &look, return true; } -static -bool makeRoleMask64(const vector<LookEntry> &look, - RoseProgram &program, const target_t &target) { - if (!target.has_avx512()) { - return false; - } - - if (look.back().offset >= look.front().offset + 64) { - return false; - } - s32 base_offset = verify_s32(look.front().offset); - array<u8, 64> and_mask, cmp_mask; - and_mask.fill(0); - cmp_mask.fill(0); - u64a neg_mask = 0; - for (const auto &entry : look) { - u8 andmask_u8, cmpmask_u8, flip; - if (!checkReachWithFlip(entry.reach, andmask_u8, cmpmask_u8, flip)) { - return false; - } - u32 shift = entry.offset - base_offset; - assert(shift < 64); - and_mask[shift] = andmask_u8; - cmp_mask[shift] = cmpmask_u8; - if (flip) { - neg_mask |= 1ULL << shift; - } - } - - DEBUG_PRINTF("and_mask %s\n", - convertMaskstoString(and_mask.data(), 64).c_str()); - DEBUG_PRINTF("cmp_mask %s\n", - convertMaskstoString(cmp_mask.data(), 64).c_str()); - DEBUG_PRINTF("neg_mask %llx\n", neg_mask); - DEBUG_PRINTF("base_offset %d\n", base_offset); - - const auto *end_inst = program.end_instruction(); - auto ri = std::make_unique<RoseInstrCheckMask64>(and_mask, cmp_mask, neg_mask, - base_offset, end_inst); - program.add_before_end(move(ri)); - return true; -} - +static +bool makeRoleMask64(const vector<LookEntry> &look, + RoseProgram &program, const target_t &target) { + if (!target.has_avx512()) { + return false; + } + + if (look.back().offset >= look.front().offset + 64) { + return false; + } + s32 base_offset = verify_s32(look.front().offset); + array<u8, 64> and_mask, cmp_mask; + and_mask.fill(0); + cmp_mask.fill(0); + u64a neg_mask = 0; + for (const auto &entry : look) { + u8 andmask_u8, cmpmask_u8, flip; + if (!checkReachWithFlip(entry.reach, andmask_u8, cmpmask_u8, flip)) { + return false; + } + u32 shift = entry.offset - base_offset; + assert(shift < 64); + and_mask[shift] = andmask_u8; + cmp_mask[shift] = cmpmask_u8; + if (flip) { + neg_mask |= 1ULL << shift; + } + } + + DEBUG_PRINTF("and_mask %s\n", + convertMaskstoString(and_mask.data(), 64).c_str()); + DEBUG_PRINTF("cmp_mask %s\n", + convertMaskstoString(cmp_mask.data(), 64).c_str()); + DEBUG_PRINTF("neg_mask %llx\n", neg_mask); + DEBUG_PRINTF("base_offset %d\n", base_offset); + + const auto *end_inst = program.end_instruction(); + auto ri = std::make_unique<RoseInstrCheckMask64>(and_mask, cmp_mask, neg_mask, + base_offset, end_inst); + program.add_before_end(move(ri)); + return true; +} + // Sorting by the size of every bucket. // Used in map<u32, vector<s8>, cmpNibble>. struct cmpNibble { @@ -1127,7 +1127,7 @@ void getAllBuckets(const vector<LookEntry> &look, } else { neg_mask ^= 1ULL << (entry.offset - base_offset); } - + map <u16, u16> lo2hi; // We treat Ascii Table as a 16x16 grid. // Push every row in cr into lo2hi and mark the row number. @@ -1281,7 +1281,7 @@ makeCheckShufti16x16(u32 offset_range, u8 bucket_idx, (hi_mask, lo_mask, bucket_select_mask_32, neg_mask & 0xffff, base_offset, end_inst); } - + static unique_ptr<RoseInstruction> makeCheckShufti32x16(u32 offset_range, u8 bucket_idx, @@ -1300,83 +1300,83 @@ makeCheckShufti32x16(u32 offset_range, u8 bucket_idx, } static -unique_ptr<RoseInstruction> -makeCheckShufti64x8(u32 offset_range, u8 bucket_idx, - const array<u8, 32> &hi_mask, const array<u8, 32> &lo_mask, - const array<u8, 64> &bucket_select_mask, - u64a neg_mask, s32 base_offset, - const RoseInstruction *end_inst) { - if (offset_range > 64 || bucket_idx > 8) { - return nullptr; - } - - array<u8, 64> hi_mask_64; - array<u8, 64> lo_mask_64; - copy(hi_mask.begin(), hi_mask.begin() + 16, hi_mask_64.begin()); - copy(hi_mask.begin(), hi_mask.begin() + 16, hi_mask_64.begin() + 16); - copy(hi_mask.begin(), hi_mask.begin() + 16, hi_mask_64.begin() + 32); - copy(hi_mask.begin(), hi_mask.begin() + 16, hi_mask_64.begin() + 48); - copy(lo_mask.begin(), lo_mask.begin() + 16, lo_mask_64.begin()); - copy(lo_mask.begin(), lo_mask.begin() + 16, lo_mask_64.begin() + 16); - copy(lo_mask.begin(), lo_mask.begin() + 16, lo_mask_64.begin() + 32); - copy(lo_mask.begin(), lo_mask.begin() + 16, lo_mask_64.begin() + 48); - - return std::make_unique<RoseInstrCheckShufti64x8> - (hi_mask_64, lo_mask_64, bucket_select_mask, - neg_mask, base_offset, end_inst); -} - -static -unique_ptr<RoseInstruction> -makeCheckShufti64x16(u32 offset_range, u8 bucket_idx, - const array<u8, 32> &hi_mask, const array<u8, 32> &lo_mask, - const array<u8, 64> &bucket_select_mask_lo, - const array<u8, 64> &bucket_select_mask_hi, - u64a neg_mask, s32 base_offset, - const RoseInstruction *end_inst) { - if (offset_range > 64 || bucket_idx > 16) { - return nullptr; - } - - array<u8, 64> hi_mask_1; - array<u8, 64> hi_mask_2; - array<u8, 64> lo_mask_1; - array<u8, 64> lo_mask_2; - - copy(hi_mask.begin(), hi_mask.begin() + 16, hi_mask_1.begin()); - copy(hi_mask.begin(), hi_mask.begin() + 16, hi_mask_1.begin() + 16); - copy(hi_mask.begin(), hi_mask.begin() + 16, hi_mask_1.begin() + 32); - copy(hi_mask.begin(), hi_mask.begin() + 16, hi_mask_1.begin() + 48); - copy(hi_mask.begin() + 16, hi_mask.begin() + 32, hi_mask_2.begin()); - copy(hi_mask.begin() + 16, hi_mask.begin() + 32, hi_mask_2.begin() + 16); - copy(hi_mask.begin() + 16, hi_mask.begin() + 32, hi_mask_2.begin() + 32); - copy(hi_mask.begin() + 16, hi_mask.begin() + 32, hi_mask_2.begin() + 48); - - copy(lo_mask.begin(), lo_mask.begin() + 16, lo_mask_1.begin()); - copy(lo_mask.begin(), lo_mask.begin() + 16, lo_mask_1.begin() + 16); - copy(lo_mask.begin(), lo_mask.begin() + 16, lo_mask_1.begin() + 32); - copy(lo_mask.begin(), lo_mask.begin() + 16, lo_mask_1.begin() + 48); - copy(lo_mask.begin() + 16, lo_mask.begin() + 32, lo_mask_2.begin()); - copy(lo_mask.begin() + 16, lo_mask.begin() + 32, lo_mask_2.begin() + 16); - copy(lo_mask.begin() + 16, lo_mask.begin() + 32, lo_mask_2.begin() + 32); - copy(lo_mask.begin() + 16, lo_mask.begin() + 32, lo_mask_2.begin() + 48); - - return std::make_unique<RoseInstrCheckShufti64x16> - (hi_mask_1, hi_mask_2, lo_mask_1, lo_mask_2, bucket_select_mask_hi, - bucket_select_mask_lo, neg_mask, base_offset, end_inst); -} - -static -bool makeRoleShufti(const vector<LookEntry> &look, RoseProgram &program, - const target_t &target) { - s32 offset_limit; - if (target.has_avx512()) { - offset_limit = 64; - } else { - offset_limit = 32; - } +unique_ptr<RoseInstruction> +makeCheckShufti64x8(u32 offset_range, u8 bucket_idx, + const array<u8, 32> &hi_mask, const array<u8, 32> &lo_mask, + const array<u8, 64> &bucket_select_mask, + u64a neg_mask, s32 base_offset, + const RoseInstruction *end_inst) { + if (offset_range > 64 || bucket_idx > 8) { + return nullptr; + } + + array<u8, 64> hi_mask_64; + array<u8, 64> lo_mask_64; + copy(hi_mask.begin(), hi_mask.begin() + 16, hi_mask_64.begin()); + copy(hi_mask.begin(), hi_mask.begin() + 16, hi_mask_64.begin() + 16); + copy(hi_mask.begin(), hi_mask.begin() + 16, hi_mask_64.begin() + 32); + copy(hi_mask.begin(), hi_mask.begin() + 16, hi_mask_64.begin() + 48); + copy(lo_mask.begin(), lo_mask.begin() + 16, lo_mask_64.begin()); + copy(lo_mask.begin(), lo_mask.begin() + 16, lo_mask_64.begin() + 16); + copy(lo_mask.begin(), lo_mask.begin() + 16, lo_mask_64.begin() + 32); + copy(lo_mask.begin(), lo_mask.begin() + 16, lo_mask_64.begin() + 48); + + return std::make_unique<RoseInstrCheckShufti64x8> + (hi_mask_64, lo_mask_64, bucket_select_mask, + neg_mask, base_offset, end_inst); +} + +static +unique_ptr<RoseInstruction> +makeCheckShufti64x16(u32 offset_range, u8 bucket_idx, + const array<u8, 32> &hi_mask, const array<u8, 32> &lo_mask, + const array<u8, 64> &bucket_select_mask_lo, + const array<u8, 64> &bucket_select_mask_hi, + u64a neg_mask, s32 base_offset, + const RoseInstruction *end_inst) { + if (offset_range > 64 || bucket_idx > 16) { + return nullptr; + } + + array<u8, 64> hi_mask_1; + array<u8, 64> hi_mask_2; + array<u8, 64> lo_mask_1; + array<u8, 64> lo_mask_2; + + copy(hi_mask.begin(), hi_mask.begin() + 16, hi_mask_1.begin()); + copy(hi_mask.begin(), hi_mask.begin() + 16, hi_mask_1.begin() + 16); + copy(hi_mask.begin(), hi_mask.begin() + 16, hi_mask_1.begin() + 32); + copy(hi_mask.begin(), hi_mask.begin() + 16, hi_mask_1.begin() + 48); + copy(hi_mask.begin() + 16, hi_mask.begin() + 32, hi_mask_2.begin()); + copy(hi_mask.begin() + 16, hi_mask.begin() + 32, hi_mask_2.begin() + 16); + copy(hi_mask.begin() + 16, hi_mask.begin() + 32, hi_mask_2.begin() + 32); + copy(hi_mask.begin() + 16, hi_mask.begin() + 32, hi_mask_2.begin() + 48); + + copy(lo_mask.begin(), lo_mask.begin() + 16, lo_mask_1.begin()); + copy(lo_mask.begin(), lo_mask.begin() + 16, lo_mask_1.begin() + 16); + copy(lo_mask.begin(), lo_mask.begin() + 16, lo_mask_1.begin() + 32); + copy(lo_mask.begin(), lo_mask.begin() + 16, lo_mask_1.begin() + 48); + copy(lo_mask.begin() + 16, lo_mask.begin() + 32, lo_mask_2.begin()); + copy(lo_mask.begin() + 16, lo_mask.begin() + 32, lo_mask_2.begin() + 16); + copy(lo_mask.begin() + 16, lo_mask.begin() + 32, lo_mask_2.begin() + 32); + copy(lo_mask.begin() + 16, lo_mask.begin() + 32, lo_mask_2.begin() + 48); + + return std::make_unique<RoseInstrCheckShufti64x16> + (hi_mask_1, hi_mask_2, lo_mask_1, lo_mask_2, bucket_select_mask_hi, + bucket_select_mask_lo, neg_mask, base_offset, end_inst); +} + +static +bool makeRoleShufti(const vector<LookEntry> &look, RoseProgram &program, + const target_t &target) { + s32 offset_limit; + if (target.has_avx512()) { + offset_limit = 64; + } else { + offset_limit = 32; + } s32 base_offset = verify_s32(look.front().offset); - if (look.back().offset >= base_offset + offset_limit) { + if (look.back().offset >= base_offset + offset_limit) { return false; } @@ -1384,40 +1384,40 @@ bool makeRoleShufti(const vector<LookEntry> &look, RoseProgram &program, u64a neg_mask_64; array<u8, 32> hi_mask; array<u8, 32> lo_mask; - array<u8, 64> bucket_select_hi_64; // for AVX512 - array<u8, 64> bucket_select_lo_64; // for AVX512 + array<u8, 64> bucket_select_hi_64; // for AVX512 + array<u8, 64> bucket_select_lo_64; // for AVX512 array<u8, 32> bucket_select_hi; array<u8, 32> bucket_select_lo; hi_mask.fill(0); lo_mask.fill(0); - bucket_select_hi_64.fill(0); - bucket_select_lo_64.fill(0); + bucket_select_hi_64.fill(0); + bucket_select_lo_64.fill(0); bucket_select_hi.fill(0); // will not be used in 16x8 and 32x8. bucket_select_lo.fill(0); - if (target.has_avx512()) { - if (!getShuftiMasks(look, hi_mask, lo_mask, bucket_select_hi_64.data(), - bucket_select_lo_64.data(), neg_mask_64, bucket_idx, - 32)) { - return false; - } - copy(bucket_select_hi_64.begin(), bucket_select_hi_64.begin() + 32, - bucket_select_hi.begin()); - copy(bucket_select_lo_64.begin(), bucket_select_lo_64.begin() + 32, - bucket_select_lo.begin()); - - DEBUG_PRINTF("bucket_select_hi_64 %s\n", - convertMaskstoString(bucket_select_hi_64.data(), 64).c_str()); - DEBUG_PRINTF("bucket_select_lo_64 %s\n", - convertMaskstoString(bucket_select_lo_64.data(), 64).c_str()); - } else { - if (!getShuftiMasks(look, hi_mask, lo_mask, bucket_select_hi.data(), - bucket_select_lo.data(), neg_mask_64, bucket_idx, - 32)) { - return false; - } - } - + if (target.has_avx512()) { + if (!getShuftiMasks(look, hi_mask, lo_mask, bucket_select_hi_64.data(), + bucket_select_lo_64.data(), neg_mask_64, bucket_idx, + 32)) { + return false; + } + copy(bucket_select_hi_64.begin(), bucket_select_hi_64.begin() + 32, + bucket_select_hi.begin()); + copy(bucket_select_lo_64.begin(), bucket_select_lo_64.begin() + 32, + bucket_select_lo.begin()); + + DEBUG_PRINTF("bucket_select_hi_64 %s\n", + convertMaskstoString(bucket_select_hi_64.data(), 64).c_str()); + DEBUG_PRINTF("bucket_select_lo_64 %s\n", + convertMaskstoString(bucket_select_lo_64.data(), 64).c_str()); + } else { + if (!getShuftiMasks(look, hi_mask, lo_mask, bucket_select_hi.data(), + bucket_select_lo.data(), neg_mask_64, bucket_idx, + 32)) { + return false; + } + } + u32 neg_mask = (u32)neg_mask_64; DEBUG_PRINTF("hi_mask %s\n", @@ -1440,13 +1440,13 @@ bool makeRoleShufti(const vector<LookEntry> &look, RoseProgram &program, bucket_select_lo, neg_mask, base_offset, end_inst); } - if (target.has_avx512()) { - if (!ri) { - ri = makeCheckShufti64x8(offset_range, bucket_idx, hi_mask, lo_mask, - bucket_select_lo_64, neg_mask_64, - base_offset, end_inst); - } - } + if (target.has_avx512()) { + if (!ri) { + ri = makeCheckShufti64x8(offset_range, bucket_idx, hi_mask, lo_mask, + bucket_select_lo_64, neg_mask_64, + base_offset, end_inst); + } + } if (!ri) { ri = makeCheckShufti16x16(offset_range, bucket_idx, hi_mask, lo_mask, bucket_select_lo, bucket_select_hi, @@ -1457,13 +1457,13 @@ bool makeRoleShufti(const vector<LookEntry> &look, RoseProgram &program, bucket_select_lo, bucket_select_hi, neg_mask, base_offset, end_inst); } - if (target.has_avx512()) { - if (!ri) { - ri = makeCheckShufti64x16(offset_range, bucket_idx, hi_mask, lo_mask, - bucket_select_lo_64, bucket_select_hi_64, - neg_mask_64, base_offset, end_inst); - } - } + if (target.has_avx512()) { + if (!ri) { + ri = makeCheckShufti64x16(offset_range, bucket_idx, hi_mask, lo_mask, + bucket_select_lo_64, bucket_select_hi_64, + neg_mask_64, base_offset, end_inst); + } + } assert(ri); program.add_before_end(move(ri)); @@ -1476,7 +1476,7 @@ bool makeRoleShufti(const vector<LookEntry> &look, RoseProgram &program, */ static void makeLookaroundInstruction(const vector<LookEntry> &look, - RoseProgram &program, const target_t &target) { + RoseProgram &program, const target_t &target) { assert(!look.empty()); if (makeRoleByte(look, program)) { @@ -1500,14 +1500,14 @@ void makeLookaroundInstruction(const vector<LookEntry> &look, return; } - if (makeRoleMask64(look, program, target)) { + if (makeRoleMask64(look, program, target)) { + return; + } + + if (makeRoleShufti(look, program, target)) { return; } - if (makeRoleShufti(look, program, target)) { - return; - } - auto ri = std::make_unique<RoseInstrCheckLookaround>(look, program.end_instruction()); program.add_before_end(move(ri)); @@ -1545,7 +1545,7 @@ void makeCheckLitMaskInstruction(const RoseBuildImpl &build, u32 lit_id, return; // all caseful chars handled by HWLM mask. } - makeLookaroundInstruction(look, program, build.cc.target_info); + makeLookaroundInstruction(look, program, build.cc.target_info); } static @@ -1584,7 +1584,7 @@ void makeCheckLitEarlyInstruction(const RoseBuildImpl &build, u32 lit_id, DEBUG_PRINTF("adding lit early check, min_offset=%u\n", min_offset); const auto *end = prog.end_instruction(); - prog.add_before_end(std::make_unique<RoseInstrCheckLitEarly>(min_offset, end)); + prog.add_before_end(std::make_unique<RoseInstrCheckLitEarly>(min_offset, end)); } static @@ -1595,7 +1595,7 @@ void makeGroupCheckInstruction(const RoseBuildImpl &build, u32 lit_id, if (!info.group_mask) { return; } - prog.add_before_end(std::make_unique<RoseInstrCheckGroups>(info.group_mask)); + prog.add_before_end(std::make_unique<RoseInstrCheckGroups>(info.group_mask)); } static @@ -1889,7 +1889,7 @@ void makeRoleLookaround(const RoseBuildImpl &build, findLookaroundMasks(build, v, look_more); mergeLookaround(look, look_more); if (!look.empty()) { - makeLookaroundInstruction(look, program, build.cc.target_info); + makeLookaroundInstruction(look, program, build.cc.target_info); } return; } @@ -1932,7 +1932,7 @@ void makeRoleSuffix(const RoseBuildImpl &build, event = MQE_TOP; } - prog.add_before_end(std::make_unique<RoseInstrTriggerSuffix>(queue, event)); + prog.add_before_end(std::make_unique<RoseInstrTriggerSuffix>(queue, event)); } static @@ -2039,7 +2039,7 @@ static void addCheckOnlyEodInstruction(RoseProgram &prog) { DEBUG_PRINTF("only at eod\n"); const auto *end_inst = prog.end_instruction(); - prog.add_before_end(std::make_unique<RoseInstrCheckOnlyEod>(end_inst)); + prog.add_before_end(std::make_unique<RoseInstrCheckOnlyEod>(end_inst)); } static @@ -2071,7 +2071,7 @@ void makeRoleEagerEodReports(const RoseBuildImpl &build, program.add_before_end(move(eod_program)); } -/** Makes a program for a role/vertex given a specific pred/in_edge. */ +/** Makes a program for a role/vertex given a specific pred/in_edge. */ static RoseProgram makeRoleProgram(const RoseBuildImpl &build, const map<RoseVertex, left_build_info> &leftfix_info, @@ -2164,7 +2164,7 @@ void makeGroupSquashInstruction(const RoseBuildImpl &build, u32 lit_id, DEBUG_PRINTF("squashes 0x%llx\n", info.group_mask); assert(info.group_mask); /* Note: group_mask is negated. */ - prog.add_before_end(std::make_unique<RoseInstrSquashGroups>(~info.group_mask)); + prog.add_before_end(std::make_unique<RoseInstrSquashGroups>(~info.group_mask)); } namespace { @@ -2209,7 +2209,7 @@ RoseProgram assembleProgramBlocks(vector<RoseProgram> &&blocks_in) { * only set if a state has been. */ if (!prog.empty() && reads_work_done_flag(block)) { RoseProgram clear_block; - clear_block.add_before_end(std::make_unique<RoseInstrClearWorkDone>()); + clear_block.add_before_end(std::make_unique<RoseInstrClearWorkDone>()); prog.add_block(move(clear_block)); } @@ -2279,7 +2279,7 @@ RoseProgram makeLiteralProgram(const RoseBuildImpl &build, } if (lit_id == build.eod_event_literal_id) { - /* Note: does not require the lit initial program */ + /* Note: does not require the lit initial program */ assert(build.eod_event_literal_id != MO_INVALID_IDX); return role_programs; } @@ -2369,7 +2369,7 @@ void makeCatchupMpv(const ReportManager &rm, bool needs_mpv_catchup, return; } - program.add_before_end(std::make_unique<RoseInstrCatchUpMpv>()); + program.add_before_end(std::make_unique<RoseInstrCatchUpMpv>()); } RoseProgram makeReportProgram(const RoseBuildImpl &build, @@ -2402,7 +2402,7 @@ RoseProgram makeBoundaryProgram(const RoseBuildImpl &build, void addIncludedJumpProgram(RoseProgram &program, u32 child_offset, u8 squash) { RoseProgram block; - block.add_before_end(std::make_unique<RoseInstrIncludedJump>(child_offset, + block.add_before_end(std::make_unique<RoseInstrIncludedJump>(child_offset, squash)); program.add_block(move(block)); } |