diff options
author | Ivan Blinkov <ivan@blinkov.ru> | 2022-02-10 16:47:10 +0300 |
---|---|---|
committer | Daniil Cherednik <dcherednik@yandex-team.ru> | 2022-02-10 16:47:10 +0300 |
commit | 1aeb9a455974457866f78722ad98114bafc84e8a (patch) | |
tree | e4340eaf1668684d83a0a58c36947c5def5350ad /contrib/libs/hyperscan/src/nfa/dfa_min.cpp | |
parent | bd5ef432f5cfb1e18851381329d94665a4c22470 (diff) | |
download | ydb-1aeb9a455974457866f78722ad98114bafc84e8a.tar.gz |
Restoring authorship annotation for Ivan Blinkov <ivan@blinkov.ru>. Commit 1 of 2.
Diffstat (limited to 'contrib/libs/hyperscan/src/nfa/dfa_min.cpp')
-rw-r--r-- | contrib/libs/hyperscan/src/nfa/dfa_min.cpp | 230 |
1 files changed, 115 insertions, 115 deletions
diff --git a/contrib/libs/hyperscan/src/nfa/dfa_min.cpp b/contrib/libs/hyperscan/src/nfa/dfa_min.cpp index 1a07e8a7d3..68b7680b78 100644 --- a/contrib/libs/hyperscan/src/nfa/dfa_min.cpp +++ b/contrib/libs/hyperscan/src/nfa/dfa_min.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2017, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -26,14 +26,14 @@ * POSSIBILITY OF SUCH DAMAGE. */ -/** - * \file - * \brief Build code for DFA minimization. - */ +/** + * \file + * \brief Build code for DFA minimization. + */ /** - * /Summary of the Hopcroft minimisation algorithm/ - * + * /Summary of the Hopcroft minimisation algorithm/ + * * partition := {F, Q \ F}; * work_queue := {F}; * while (work_queue is not empty) do @@ -59,19 +59,19 @@ #include "dfa_min.h" #include "grey.h" -#include "mcclellancompile_util.h" -#include "rdfa.h" +#include "mcclellancompile_util.h" +#include "rdfa.h" #include "ue2common.h" -#include "util/container.h" -#include "util/flat_containers.h" -#include "util/noncopyable.h" +#include "util/container.h" +#include "util/flat_containers.h" +#include "util/noncopyable.h" #include "util/partitioned_set.h" #include <algorithm> #include <functional> -#include <iterator> +#include <iterator> #include <map> -#include <queue> +#include <queue> #include <set> #include <vector> @@ -82,77 +82,77 @@ namespace ue2 { namespace { struct hopcroft_state_info { - explicit hopcroft_state_info(size_t alpha_size) : prev(alpha_size) {} - - /** \brief Mapping from symbol to a list of predecessors that transition to - * this state on that symbol. */ - vector<vector<dstate_id_t>> prev; + explicit hopcroft_state_info(size_t alpha_size) : prev(alpha_size) {} + + /** \brief Mapping from symbol to a list of predecessors that transition to + * this state on that symbol. */ + vector<vector<dstate_id_t>> prev; }; -struct HopcroftInfo : noncopyable { - size_t alpha_size; //!< Size of DFA alphabet. - queue<size_t> work_queue; //!< Hopcroft work queue of partition indices. - partitioned_set<dstate_id_t> partition; //!< Partition set of DFA states. - vector<hopcroft_state_info> states; //!< Pre-calculated state info (preds) +struct HopcroftInfo : noncopyable { + size_t alpha_size; //!< Size of DFA alphabet. + queue<size_t> work_queue; //!< Hopcroft work queue of partition indices. + partitioned_set<dstate_id_t> partition; //!< Partition set of DFA states. + vector<hopcroft_state_info> states; //!< Pre-calculated state info (preds) - explicit HopcroftInfo(const raw_dfa &rdfa); + explicit HopcroftInfo(const raw_dfa &rdfa); }; -} // namespace +} // namespace /** - * \brief Create an initial partitioning and work_queue. + * \brief Create an initial partitioning and work_queue. * - * Initial partition contains {accepting states..., Non-accepting states} - * Initial work_queue contains accepting state subsets + * Initial partition contains {accepting states..., Non-accepting states} + * Initial work_queue contains accepting state subsets * - * The initial partitioning needs to distinguish between the different - * reporting behaviours (unlike standard Hopcroft) --> more than one subset - * possible for the accepting states. - * - * Look for accepting states in both reports and reports_eod. - * Creates a map with a key(reports, reports_eod) and an id. - * Reports of each state are searched against the map and - * added to the corresponding id -> partition[id] and work_queue[id]. - * Non Accept states are added to partition[id+1]. + * The initial partitioning needs to distinguish between the different + * reporting behaviours (unlike standard Hopcroft) --> more than one subset + * possible for the accepting states. + * + * Look for accepting states in both reports and reports_eod. + * Creates a map with a key(reports, reports_eod) and an id. + * Reports of each state are searched against the map and + * added to the corresponding id -> partition[id] and work_queue[id]. + * Non Accept states are added to partition[id+1]. */ static -vector<size_t> create_map(const raw_dfa &rdfa, queue<size_t> &work_queue) { +vector<size_t> create_map(const raw_dfa &rdfa, queue<size_t> &work_queue) { using ReportKey = pair<flat_set<ReportID>, flat_set<ReportID>>; map<ReportKey, size_t> subset_map; vector<size_t> state_to_subset(rdfa.states.size(), INVALID_SUBSET); for (size_t i = 0; i < rdfa.states.size(); i++) { - const auto &ds = rdfa.states[i]; - if (!ds.reports.empty() || !ds.reports_eod.empty()) { - ReportKey key(ds.reports, ds.reports_eod); + const auto &ds = rdfa.states[i]; + if (!ds.reports.empty() || !ds.reports_eod.empty()) { + ReportKey key(ds.reports, ds.reports_eod); if (contains(subset_map, key)) { state_to_subset[i] = subset_map[key]; } else { size_t sub = subset_map.size(); - subset_map.emplace(std::move(key), sub); + subset_map.emplace(std::move(key), sub); state_to_subset[i] = sub; - work_queue.push(sub); + work_queue.push(sub); } } } - /* Give non-accept states their own subset. */ + /* Give non-accept states their own subset. */ size_t non_accept_sub = subset_map.size(); - replace(state_to_subset.begin(), state_to_subset.end(), INVALID_SUBSET, - non_accept_sub); + replace(state_to_subset.begin(), state_to_subset.end(), INVALID_SUBSET, + non_accept_sub); return state_to_subset; } -HopcroftInfo::HopcroftInfo(const raw_dfa &rdfa) - : alpha_size(rdfa.alpha_size), partition(create_map(rdfa, work_queue)), - states(rdfa.states.size(), hopcroft_state_info(alpha_size)) { - /* Construct predecessor lists for each state, indexed by symbol. */ - for (size_t i = 0; i < states.size(); i++) { // i is the previous state - for (size_t sym = 0; sym < alpha_size; sym++) { - dstate_id_t present_state = rdfa.states[i].next[sym]; - states[present_state].prev[sym].push_back(i); +HopcroftInfo::HopcroftInfo(const raw_dfa &rdfa) + : alpha_size(rdfa.alpha_size), partition(create_map(rdfa, work_queue)), + states(rdfa.states.size(), hopcroft_state_info(alpha_size)) { + /* Construct predecessor lists for each state, indexed by symbol. */ + for (size_t i = 0; i < states.size(); i++) { // i is the previous state + for (size_t sym = 0; sym < alpha_size; sym++) { + dstate_id_t present_state = rdfa.states[i].next[sym]; + states[present_state].prev[sym].push_back(i); } } } @@ -170,14 +170,14 @@ HopcroftInfo::HopcroftInfo(const raw_dfa &rdfa) * - replace S in work_queue by the smaller of the two sets. */ static -void split_and_replace_set(const size_t part_index, HopcroftInfo &info, - const flat_set<dstate_id_t> &splitter) { +void split_and_replace_set(const size_t part_index, HopcroftInfo &info, + const flat_set<dstate_id_t> &splitter) { /* singleton sets cannot be split */ - if (info.partition[part_index].size() == 1) { + if (info.partition[part_index].size() == 1) { return; } - size_t small_index = info.partition.split(part_index, splitter); + size_t small_index = info.partition.split(part_index, splitter); if (small_index == INVALID_SUBSET) { /* the set could not be split */ @@ -187,56 +187,56 @@ void split_and_replace_set(const size_t part_index, HopcroftInfo &info, /* larger subset remains at the input subset index, if the input subset was * already in the work queue then the larger subset will remain there. */ - info.work_queue.push(small_index); + info.work_queue.push(small_index); } /** - * \brief Core of the Hopcroft minimisation algorithm. + * \brief Core of the Hopcroft minimisation algorithm. */ static -void dfa_min(HopcroftInfo &info) { - flat_set<dstate_id_t> curr, sym_preds; +void dfa_min(HopcroftInfo &info) { + flat_set<dstate_id_t> curr, sym_preds; vector<size_t> cand_subsets; - while (!info.work_queue.empty()) { - /* Choose and remove a set of states (curr, or A in the description - * above) from the work queue. Note that we copy the set because the - * partition may be split by the loop below. */ - curr.clear(); - insert(&curr, info.partition[info.work_queue.front()]); - info.work_queue.pop(); - - for (size_t sym = 0; sym < info.alpha_size; sym++) { - /* Find the set of states sym_preds for which a transition on the - * given symbol leads to a state in curr. */ - sym_preds.clear(); - for (dstate_id_t s : curr) { - insert(&sym_preds, info.states[s].prev[sym]); - } - - if (sym_preds.empty()) { + while (!info.work_queue.empty()) { + /* Choose and remove a set of states (curr, or A in the description + * above) from the work queue. Note that we copy the set because the + * partition may be split by the loop below. */ + curr.clear(); + insert(&curr, info.partition[info.work_queue.front()]); + info.work_queue.pop(); + + for (size_t sym = 0; sym < info.alpha_size; sym++) { + /* Find the set of states sym_preds for which a transition on the + * given symbol leads to a state in curr. */ + sym_preds.clear(); + for (dstate_id_t s : curr) { + insert(&sym_preds, info.states[s].prev[sym]); + } + + if (sym_preds.empty()) { continue; } - /* we only need to consider subsets with at least one member in - * sym_preds for splitting */ + /* we only need to consider subsets with at least one member in + * sym_preds for splitting */ cand_subsets.clear(); - info.partition.find_overlapping(sym_preds, &cand_subsets); + info.partition.find_overlapping(sym_preds, &cand_subsets); for (size_t sub : cand_subsets) { - split_and_replace_set(sub, info, sym_preds); + split_and_replace_set(sub, info, sym_preds); } } } } /** - * \brief Build the new DFA state table. + * \brief Build the new DFA state table. */ static -void mapping_new_states(const HopcroftInfo &info, - vector<dstate_id_t> &old_to_new, raw_dfa &rdfa) { - const size_t num_partitions = info.partition.size(); +void mapping_new_states(const HopcroftInfo &info, + vector<dstate_id_t> &old_to_new, raw_dfa &rdfa) { + const size_t num_partitions = info.partition.size(); // Mapping from equiv class's first state to equiv class index. map<dstate_id_t, size_t> ordering; @@ -245,7 +245,7 @@ void mapping_new_states(const HopcroftInfo &info, vector<dstate_id_t> eq_state(num_partitions); for (size_t i = 0; i < num_partitions; i++) { - ordering[*info.partition[i].begin()] = i; + ordering[*info.partition[i].begin()] = i; } dstate_id_t new_id = 0; @@ -253,28 +253,28 @@ void mapping_new_states(const HopcroftInfo &info, eq_state[m.second] = new_id++; } - for (size_t t = 0; t < info.partition.size(); t++) { - for (dstate_id_t id : info.partition[t]) { + for (size_t t = 0; t < info.partition.size(); t++) { + for (dstate_id_t id : info.partition[t]) { old_to_new[id] = eq_state[t]; } } vector<dstate> new_states; new_states.reserve(num_partitions); - - for (const auto &m : ordering) { - new_states.push_back(rdfa.states[m.first]); + + for (const auto &m : ordering) { + new_states.push_back(rdfa.states[m.first]); } - rdfa.states = std::move(new_states); + rdfa.states = std::move(new_states); } static -void renumber_new_states(const HopcroftInfo &info, - const vector<dstate_id_t> &old_to_new, raw_dfa &rdfa) { - for (size_t i = 0; i < info.partition.size(); i++) { - for (size_t sym = 0; sym < info.alpha_size; sym++) { - dstate_id_t output = rdfa.states[i].next[sym]; - rdfa.states[i].next[sym] = old_to_new[output]; +void renumber_new_states(const HopcroftInfo &info, + const vector<dstate_id_t> &old_to_new, raw_dfa &rdfa) { + for (size_t i = 0; i < info.partition.size(); i++) { + for (size_t sym = 0; sym < info.alpha_size; sym++) { + dstate_id_t output = rdfa.states[i].next[sym]; + rdfa.states[i].next[sym] = old_to_new[output]; } dstate_id_t dad = rdfa.states[i].daddy; rdfa.states[i].daddy = old_to_new[dad]; @@ -285,14 +285,14 @@ void renumber_new_states(const HopcroftInfo &info, } static -void new_dfa(raw_dfa &rdfa, const HopcroftInfo &info) { - if (info.partition.size() == info.states.size()) { - return; +void new_dfa(raw_dfa &rdfa, const HopcroftInfo &info) { + if (info.partition.size() == info.states.size()) { + return; } - - vector<dstate_id_t> old_to_new(info.states.size()); - mapping_new_states(info, old_to_new, rdfa); - renumber_new_states(info, old_to_new, rdfa); + + vector<dstate_id_t> old_to_new(info.states.size()); + mapping_new_states(info, old_to_new, rdfa); + renumber_new_states(info, old_to_new, rdfa); } void minimize_hopcroft(raw_dfa &rdfa, const Grey &grey) { @@ -300,16 +300,16 @@ void minimize_hopcroft(raw_dfa &rdfa, const Grey &grey) { return; } - if (is_dead(rdfa)) { - DEBUG_PRINTF("dfa is empty\n"); - } - + if (is_dead(rdfa)) { + DEBUG_PRINTF("dfa is empty\n"); + } + UNUSED const size_t states_before = rdfa.states.size(); - HopcroftInfo info(rdfa); + HopcroftInfo info(rdfa); - dfa_min(info); - new_dfa(rdfa, info); + dfa_min(info); + new_dfa(rdfa, info); DEBUG_PRINTF("reduced from %zu to %zu states\n", states_before, rdfa.states.size()); |